1 Abstract

2 Introduction

3 Data

  • Data comes from three sources: ** DOAJ ** OpenEditors ** Australian ERA journal evaluation list

  • OpenEditors Data is cleaned for ** character sets ** white space ** missing ISSN in editors – recovered by matching to other databases on publisher and title.

.** Of XXX total journals in OpenEditors, YYY are matched

  • Data merged by ISSN ** Subject fields from era ** Open license from doaj
### Join editors with journal information
editors_join.tbl <- editors_clean.tbl
if (!doc_debug) {
    rm("editors_raw.tbl","era_raw.tbl")
}
editors_join.tbl %<>% left_join(doaj.tbl %>% select(issn,"Journal license"), by="issn") %>% rename(license="Journal license")  %>% rowwise() %>%  mutate(license = ifelse(is.na(license),"none",license),  IND_openlicense=(license!="none"))

editors_join.tbl %<>% mutate(IND_openlicense = ifelse(issn=="",NA,IND_openlicense))

editors_join.tbl %<>% left_join(era.tbl %>% select(issn,subjects), by=c("issn"))
  • Fields parsed
  • first name: extracted from full name (using humaniformat), with preprocessing to remove titled (“Dr.”“Professor.”)
  • county - parsed from affiliation, validated with gazetteer
### extract given names for gender analysis
editors_parse.tbl <- editors_join.tbl 
if (!doc_debug) {
    rm("editors_clean.tbl")
}

# first_name() fails on empty string, wrap it
safe_first_name <- possibly(first_name, otherwise="")

# remove honorifics
editors_parse.tbl %<>% rowwise() %>% mutate( LS_FULLNAME = str_squish(str_replace( `editor`, '(Dr\\.)|(Prof\\.)|(Doctor)|(Professor)|(Dr )|(Prof )', '')))                                    
editors_parse.tbl %<>% rowwise() %>% mutate(LS_GIVENNAME = safe_first_name(`LS_FULLNAME`))
                                             
                 
#post-cleanup
# single letter, or ending with a period of comma, is a last name, or abbreviation rather than first
editors_parse.tbl %<>% rowwise() %>% mutate(LS_GIVENNAME = case_when(
  LS_GIVENNAME=="" ~ NA_character_,
  str_length(LS_GIVENNAME)==1 ~ NA_character_,
  str_detect(LS_GIVENNAME,'.*(\\.|\\,)') ~ NA_character_,
  TRUE ~ LS_GIVENNAME
))
### extract country using geotext
editors_parse_c.tbl <- editors_parse.tbl 
if (!doc_debug) {
    rm("editors_join.tbl")
}

## setup geotext
if (doc_refresh_data & doc_debug) {
  py_install(packages="geotext") 
}

wrap_python <- function (module,importfun) {
  core_fun <- import(module)
  safe_fun <- possibly(core_fun[importfun], otherwise=NA)
  safe_list_fun <- function (xlist,...) {
    return( 
      sapply(xlist, safe_fun, ...,
             simplify=TRUE, USE.NAMES=FALSE )
    )
  }
}

geotext<- wrap_python("geotext","GeoText") 

## geotext and check against naive parsing

affiliations.tbl <- editors_parse_c.tbl %>% group_by(`affiliation`) %>% slice_head(n=1) %>% ungroup() %>% select(`affiliation`)

affiliations.tbl %<>% rowwise() %>% mutate(LS_COUNTRY_CHK = tail(unlist(str_split(`affiliation`,',')),n=1))

affiliations.tbl %<>% rowwise() %>% mutate( 
  LS_COUNTRY_G = names(geotext(str_to_title(`LS_COUNTRY_CHK`))[[1]]["country_mentions"])[1] 
  )

affiliations.tbl %<>% rowwise() %>% mutate(LS_COUNTRY_CHK2 = str_to_title(LS_COUNTRY_CHK),
          LS_COUNTRY = case_when(
  !is.na(LS_COUNTRY_G) ~ LS_COUNTRY_G, 
  str_detect(LS_COUNTRY_CHK,"USA") ~ "US",
  str_detect(LS_COUNTRY_CHK,"UK") ~ "GB",
  str_detect(LS_COUNTRY_CHK2,"Netherlands") ~ "NL",
  str_detect(LS_COUNTRY_CHK2,"Russia") ~ "RU",
  str_detect(LS_COUNTRY_CHK2,"Viet Nam") ~ "VN",
  str_detect(LS_COUNTRY_CHK2,"Korea") ~ "KR",
  str_detect(LS_COUNTRY_CHK2,"Emirates") ~ "AE",
  str_detect(LS_COUNTRY_CHK,"UAE") ~ "AE",
  str_detect(LS_COUNTRY_CHK,"CHN") ~ "CN",
  str_detect(LS_COUNTRY_CHK2,"Brasil") ~ "BR",
  str_detect(LS_COUNTRY_CHK2,"Scotland") ~ "GB",
  str_detect(LS_COUNTRY_CHK2,"Singapore") ~ "SG",
  str_detect(LS_COUNTRY_CHK2,"Trinidad") ~ "TT",
  str_detect(LS_COUNTRY_CHK,"KSA") ~ "SA",
  str_detect(affiliation,"Korea") ~ "KR",
  TRUE ~ ""
))

editors_parse_c.tbl %<>% left_join(affiliations.tbl  %>% select("affiliation","LS_COUNTRY"), by=c("affiliation"))

4 Methods

4.1 Gender Imputation

Making scholarship more inclusive requires making the characteristics of those participating visible. Because no systematic public data on self-reported author characteristics exists, however, research on participation in scholarly publications must use bibliometric methods to impute gender from author names. (See, for example, lariviere2013bibliometrics?) To impute the geneder of editors we apply a method that is commonly used in scientometric analysis and which is based on analysis of historical censuses (blevins2015jane?) to impute gender based on author names. We then use this imputation to explore the inclusion of works authored by men and women over time.1

### impute gender based on name
gender_meth <- "ipums"
#TODO: 
# - multiple methods analysis
# - fix genderizer timeouts

# gender can fail on genderize method

safer_gender <- function(x,...) {
  safe_gender <- possibly(gender, otherwise=list(gender=""))
  rv <- safe_gender(x,...)[["gender"]]
  if (is.na(rv) || (length(rv)==0)) {
    rv <- ""
  }
  rv
}

nms.tbl <-  editors_full.tbl %>% count(`LS_GIVENNAME`) %>% arrange(desc(n))

nms.tbl %<>% rowwise() %>%
  mutate( LS_GENDER = 
           safer_gender(`LS_GIVENNAME`,method=gender_meth))

editors_full.tbl %<>% left_join(nms.tbl %>% select(-n), by=c("LS_GIVENNAME"))
edcsv.file <- "editors_full.csv.gz"
write_csv(editors_full.tbl,edcsv.file)

5 Analysis

if (!doc_debug) {
    rm("editors_parse_c.tbl")
}

if ( doc_debug ) {
    editors_full.tbl <- read_csv(edcsv.file,
                             col_types =list(subjects=col_character()) )
}

editors_analysis.tbl <- editors_full.tbl
editors_analysis.tbl %<>% mutate (
  FAC_ROLE = factor(CAT_ROLE, levels=c("review","editor","chief"), ordered=TRUE ),
  IND_MALE = na_if(LS_GENDER,"either"),
  IND_MALE = na_if(IND_MALE,""),
  IND_MALE = IND_MALE=="male",
  ) %>% 
  rename( NM_JOURNAL=journal, CAT_PUBLISHER=publisher, LS_SUBJECTS = subjects, IND_OPEN = IND_openlicense) %>%
  select(NM_JOURNAL, CAT_PUBLISHER, IND_MALE, IND_OPEN, LS_COUNTRY, LS_SUBJECTS, FAC_ROLE) %>% ungroup()

### construct editorial board characteristics
journal_board_analysis.tbl <- editors_analysis.tbl %>% 
  group_by(NM_JOURNAL,FAC_ROLE) %>% 
  summarise(
    CAT_PUBLISHER = head(CAT_PUBLISHER,n=1),
    LIST_SUBJECTS = unique(str_split(head(LS_SUBJECTS,n=1),',')),
    N_SUBJECTS = ifelse(head(LS_SUBJECTS,n=1)=="MD", 4, length(unlist(LIST_SUBJECTS))), # ERA counts 3 subjects, plus "MD" for multidisciplinary 
  IND_OPEN= head(IND_OPEN,n=1),
  LIST_ROLEGROUP_COUNTRIES = list(na.omit(LS_COUNTRY)),
  N_ROLEGROUP_COUNTRIES= length(unique(unlist(LIST_ROLEGROUP_COUNTRIES))),
  N_ROLEGROUP_COUNTRIES = na_if(N_ROLEGROUP_COUNTRIES,0),
  PERCENT_ROLEGROUP_MALE = mean(IND_MALE,na.rm=TRUE)
) %>% ungroup()

journal_analysis.tbl <- journal_board_analysis.tbl %>%
  group_by(NM_JOURNAL) %>%
  select(-N_ROLEGROUP_COUNTRIES,-PERCENT_ROLEGROUP_MALE, - LIST_ROLEGROUP_COUNTRIES) %>% 
  slice_head(n=1) %>% ungroup() %>% select(-NM_JOURNAL)

5.1 Journal Characteristics

journal_analysis.tbl %>% select(-LIST_SUBJECTS) %>% Desc()
## ------------------------------------------------------------------------------ 
## Describe . (tbl_df, tbl, data.frame):
## 
## data frame:  6080 obs. of  4 variables
##      3598 complete cases (59.2%)
## 
##   Nr  ColName        Class            NAs           Levels           
##   1   FAC_ROLE       ordered, factor    45 (0.7%)   (3): 1-review,   
##                                                     2-editor, 3-chief
##   2   CAT_PUBLISHER  character           .                           
##   3   N_SUBJECTS     numeric          2468 (40.6%)                   
##   4   IND_OPEN       logical          1289 (21.2%)                   
## 
## 
## ------------------------------------------------------------------------------ 
## 1 - FAC_ROLE (ordered, factor)
## 
##   length      n    NAs unique levels  dupes
##    6'080  6'035     45      3      3      y
##           99.3%   0.7%                     
## 
##     level   freq   perc  cumfreq  cumperc
## 1  review  5'129  85.0%    5'129    85.0%
## 2  editor    872  14.4%    6'001    99.4%
## 3   chief     34   0.6%    6'035   100.0%

## ------------------------------------------------------------------------------ 
## 2 - CAT_PUBLISHER (character)
## 
##   length      n    NAs unique levels  dupes
##    6'080  6'080      0     17     17      y
##          100.0%   0.0%                     
## 
##                          level   freq   perc  cumfreq  cumperc
## 1                     Elsevier  2'134  35.1%    2'134    35.1%
## 2                         SAGE  1'191  19.6%    3'325    54.7%
## 3                 Inderscience    470   7.7%    3'795    62.4%
## 4   Cambridge University Press    398   6.5%    4'193    69.0%
## 5                      Emerald    370   6.1%    4'563    75.0%
## 6                        Brill    279   4.6%    4'842    79.6%
## 7                         MDPI    274   4.5%    5'116    84.1%
## 8                      Hindawi    220   3.6%    5'336    87.8%
## 9                   IGI Global    220   3.6%    5'556    91.4%
## 10                    Pleiades    115   1.9%    5'671    93.3%
## 11                      Karger     99   1.6%    5'770    94.9%
## 12             Frontiers Media     92   1.5%    5'862    96.4%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## 3 - N_SUBJECTS (numeric)
## 
##   length      n    NAs  unique    0s  mean  meanCI'
##    6'080  3'612  2'468       4     0  1.95    1.92
##           59.4%  40.6%          0.0%          1.98
##                                                   
##      .05    .10    .25  median   .75   .90     .95
##     1.00   1.00   1.00    2.00  3.00  3.00    3.00
##                                                   
##    range     sd  vcoef     mad   IQR  skew    kurt
##     3.00   0.87   0.45    1.48  2.00  0.35   -1.04
##                                                   
## 
##    level   freq   perc  cumfreq  cumperc
## 1      1  1'363  37.7%    1'363    37.7%
## 2      2  1'166  32.3%    2'529    70.0%
## 3      3    984  27.2%    3'513    97.3%
## 4      4     99   2.7%    3'612   100.0%
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 4 - IND_OPEN (logical - dichotomous)
## 
##   length      n    NAs unique
##    6'080  4'791  1'289      2
##           78.8%  21.2%       
## 
##         freq   perc  lci.95  uci.95'
## FALSE  4'000  83.5%   82.4%   84.5%
## TRUE     791  16.5%   15.5%   17.6%
## 
## ' 95%-CI (Wilson)

5.2 Editor Characteristics

editors_analysis.tbl  %>% Desc(formula=~IND_MALE+LS_COUNTRY+FAC_ROLE+IND_OPEN,data=.)
## ------------------------------------------------------------------------------ 
## .$IND_MALE (logical)
## 
##    length       n     NAs  unique
##   478'562 314'469 164'093       2
##             65.7%   34.3%        
## 
##           freq   perc  lci.95  uci.95'
## FALSE  100'097  31.8%   31.7%   32.0%
## TRUE   214'372  68.2%   68.0%   68.3%
## 
## ' 95%-CI (Wilson)

## ------------------------------------------------------------------------------ 
## .$LS_COUNTRY (character)
## 
##    length       n     NAs  unique  levels   dupes
##   478'562 478'562       0     191     191       y
##            100.0%    0.0%                        
## 
##     level     freq   perc  cumfreq  cumperc
## 1      US  133'814  28.0%  133'814    28.0%
## 2      GB   39'538   8.3%  173'352    36.2%
## 3      IT   31'812   6.6%  205'164    42.9%
## 4      CN   27'418   5.7%  232'582    48.6%
## 5           20'778   4.3%  253'360    52.9%
## 6      DE   19'935   4.2%  273'295    57.1%
## 7      AU   17'841   3.7%  291'136    60.8%
## 8      FR   16'468   3.4%  307'604    64.3%
## 9      ES   16'154   3.4%  323'758    67.7%
## 10     CA   15'782   3.3%  339'540    71.0%
## 11     JP   11'786   2.5%  351'326    73.4%
## 12     IN    9'928   2.1%  361'254    75.5%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## .$FAC_ROLE (ordered)
## 
##    length       n     NAs  unique  levels   dupes
##   478'562 469'172   9'390       3       3       y
##             98.0%    2.0%                        
## 
##     level     freq   perc  cumfreq  cumperc
## 1  review  317'646  67.7%  317'646    67.7%
## 2  editor  145'296  31.0%  462'942    98.7%
## 3   chief    6'230   1.3%  469'172   100.0%

## ------------------------------------------------------------------------------ 
## .$IND_OPEN (logical)
## 
##    length       n     NAs  unique
##   478'562 430'553  48'009       2
##             90.0%   10.0%        
## 
##           freq   perc  lci.95  uci.95'
## FALSE  197'612  45.9%   45.7%   46.0%
## TRUE   232'941  54.1%   54.0%   54.3%
## 
## ' 95%-CI (Wilson)

editors_analysis.tbl  %>% Desc(formula=LS_COUNTRY+FAC_ROLE+IND_MALE~IND_OPEN,data=.)
## ------------------------------------------------------------------------------ 
## LS_COUNTRY ~ IND_OPEN (.)
## 
## Summary: 
## n: 430'553, rows: 191, columns: 2
## 
## Pearson's Chi-squared test:
##   X-squared = NaN, df = 190, p-value = NA
## Log likelihood ratio (G-test) test of independence:
##   G = 27789, X-squared df = 190, p-value < 2.2e-16
## Mantel-Haenszel Chi-squared:
##   X-squared = 970.61, df = 1, p-value < 2.2e-16
## 
## Warning message:
##   Exp. counts < 5: Chi-squared approx. may be incorrect!!
## 
## 
## Phi-Coefficient        NaN
## Contingency Coeff.     NaN
## Cramer's V             NaN
## 
##                                              
##              IND_OPEN   FALSE    TRUE     Sum
## LS_COUNTRY                                   
##              freq      12'023   3'385  15'408
##              perc        2.8%    0.8%    3.6%
##              p.row      78.0%   22.0%       .
##              p.col       6.1%    1.5%       .
##                                              
## AD           freq           2       0       2
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## AE           freq         471     303     774
##              perc        0.1%    0.1%    0.2%
##              p.row      60.9%   39.1%       .
##              p.col       0.2%    0.1%       .
##                                              
## AF           freq           6       0       6
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## AL           freq          21      24      45
##              perc        0.0%    0.0%    0.0%
##              p.row      46.7%   53.3%       .
##              p.col       0.0%    0.0%       .
##                                              
## AM           freq          15      12      27
##              perc        0.0%    0.0%    0.0%
##              p.row      55.6%   44.4%       .
##              p.col       0.0%    0.0%       .
##                                              
## AN           freq           0       2       2
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## AO           freq           3       0       3
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## AR           freq         486   1'119   1'605
##              perc        0.1%    0.3%    0.4%
##              p.row      30.3%   69.7%       .
##              p.col       0.2%    0.5%       .
##                                              
## AS           freq           0       1       1
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## AT           freq       1'290   1'825   3'115
##              perc        0.3%    0.4%    0.7%
##              p.row      41.4%   58.6%       .
##              p.col       0.7%    0.8%       .
##                                              
## AU           freq       8'179   8'196  16'375
##              perc        1.9%    1.9%    3.8%
##              p.row      49.9%   50.1%       .
##              p.col       4.1%    3.5%       .
##                                              
## AZ           freq          10       3      13
##              perc        0.0%    0.0%    0.0%
##              p.row      76.9%   23.1%       .
##              p.col       0.0%    0.0%       .
##                                              
## BA           freq           0       0       0
##              perc        0.0%    0.0%    0.0%
##              p.row    NA      NA            .
##              p.col       0.0%    0.0%       .
##                                              
## BB           freq          13       6      19
##              perc        0.0%    0.0%    0.0%
##              p.row      68.4%   31.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## BD           freq         102     167     269
##              perc        0.0%    0.0%    0.1%
##              p.row      37.9%   62.1%       .
##              p.col       0.1%    0.1%       .
##                                              
## BE           freq       1'618   2'141   3'759
##              perc        0.4%    0.5%    0.9%
##              p.row      43.0%   57.0%       .
##              p.col       0.8%    0.9%       .
##                                              
## BF           freq           8      14      22
##              perc        0.0%    0.0%    0.0%
##              p.row      36.4%   63.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## BG           freq         131     167     298
##              perc        0.0%    0.0%    0.1%
##              p.row      44.0%   56.0%       .
##              p.col       0.1%    0.1%       .
##                                              
## BH           freq          62      30      92
##              perc        0.0%    0.0%    0.0%
##              p.row      67.4%   32.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## BJ           freq           5       2       7
##              perc        0.0%    0.0%    0.0%
##              p.row      71.4%   28.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## BM           freq           2       4       6
##              perc        0.0%    0.0%    0.0%
##              p.row      33.3%   66.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## BN           freq           3      17      20
##              perc        0.0%    0.0%    0.0%
##              p.row      15.0%   85.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## BO           freq           3       6       9
##              perc        0.0%    0.0%    0.0%
##              p.row      33.3%   66.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## BR           freq       1'955   5'006   6'961
##              perc        0.5%    1.2%    1.6%
##              p.row      28.1%   71.9%       .
##              p.col       1.0%    2.1%       .
##                                              
## BS           freq           3       2       5
##              perc        0.0%    0.0%    0.0%
##              p.row      60.0%   40.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## BT           freq           5       1       6
##              perc        0.0%    0.0%    0.0%
##              p.row      83.3%   16.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## BW           freq          22      22      44
##              perc        0.0%    0.0%    0.0%
##              p.row      50.0%   50.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## BY           freq          30      16      46
##              perc        0.0%    0.0%    0.0%
##              p.row      65.2%   34.8%       .
##              p.col       0.0%    0.0%       .
##                                              
## BZ           freq           1       1       2
##              perc        0.0%    0.0%    0.0%
##              p.row      50.0%   50.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## CA           freq       7'170   7'042  14'212
##              perc        1.7%    1.6%    3.3%
##              p.row      50.5%   49.5%       .
##              p.col       3.6%    3.0%       .
##                                              
## CH           freq       2'025   3'681   5'706
##              perc        0.5%    0.9%    1.3%
##              p.row      35.5%   64.5%       .
##              p.col       1.0%    1.6%       .
##                                              
## CI           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## CL           freq         368     932   1'300
##              perc        0.1%    0.2%    0.3%
##              p.row      28.3%   71.7%       .
##              p.col       0.2%    0.4%       .
##                                              
## CM           freq          25      41      66
##              perc        0.0%    0.0%    0.0%
##              p.row      37.9%   62.1%       .
##              p.col       0.0%    0.0%       .
##                                              
## CN           freq       9'212  15'194  24'406
##              perc        2.1%    3.5%    5.7%
##              p.row      37.7%   62.3%       .
##              p.col       4.7%    6.5%       .
##                                              
## CO           freq         181     322     503
##              perc        0.0%    0.1%    0.1%
##              p.row      36.0%   64.0%       .
##              p.col       0.1%    0.1%       .
##                                              
## CR           freq          36      40      76
##              perc        0.0%    0.0%    0.0%
##              p.row      47.4%   52.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## CU           freq          20      47      67
##              perc        0.0%    0.0%    0.0%
##              p.row      29.9%   70.1%       .
##              p.col       0.0%    0.0%       .
##                                              
## CY           freq         245     242     487
##              perc        0.1%    0.1%    0.1%
##              p.row      50.3%   49.7%       .
##              p.col       0.1%    0.1%       .
##                                              
## CZ           freq         360     264     624
##              perc        0.1%    0.1%    0.1%
##              p.row      57.7%   42.3%       .
##              p.col       0.2%    0.1%       .
##                                              
## DE           freq       6'470  11'868  18'338
##              perc        1.5%    2.8%    4.3%
##              p.row      35.3%   64.7%       .
##              p.col       3.3%    5.1%       .
##                                              
## DK           freq       1'483   1'630   3'113
##              perc        0.3%    0.4%    0.7%
##              p.row      47.6%   52.4%       .
##              p.col       0.8%    0.7%       .
##                                              
## DM           freq           0       1       1
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## DO           freq           3      10      13
##              perc        0.0%    0.0%    0.0%
##              p.row      23.1%   76.9%       .
##              p.col       0.0%    0.0%       .
##                                              
## DZ           freq         124      39     163
##              perc        0.0%    0.0%    0.0%
##              p.row      76.1%   23.9%       .
##              p.col       0.1%    0.0%       .
##                                              
## EC           freq          32      53      85
##              perc        0.0%    0.0%    0.0%
##              p.row      37.6%   62.4%       .
##              p.col       0.0%    0.0%       .
##                                              
## EE           freq         112     134     246
##              perc        0.0%    0.0%    0.1%
##              p.row      45.5%   54.5%       .
##              p.col       0.1%    0.1%       .
##                                              
## EG           freq         368     536     904
##              perc        0.1%    0.1%    0.2%
##              p.row      40.7%   59.3%       .
##              p.col       0.2%    0.2%       .
##                                              
## ER           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## ES           freq       3'648  10'213  13'861
##              perc        0.8%    2.4%    3.2%
##              p.row      26.3%   73.7%       .
##              p.col       1.8%    4.4%       .
##                                              
## ET           freq          40      82     122
##              perc        0.0%    0.0%    0.0%
##              p.row      32.8%   67.2%       .
##              p.col       0.0%    0.0%       .
##                                              
## FI           freq       1'397   1'191   2'588
##              perc        0.3%    0.3%    0.6%
##              p.row      54.0%   46.0%       .
##              p.col       0.7%    0.5%       .
##                                              
## FJ           freq          25      19      44
##              perc        0.0%    0.0%    0.0%
##              p.row      56.8%   43.2%       .
##              p.col       0.0%    0.0%       .
##                                              
## FK           freq           0       2       2
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## FO           freq           4       2       6
##              perc        0.0%    0.0%    0.0%
##              p.row      66.7%   33.3%       .
##              p.col       0.0%    0.0%       .
##                                              
## FR           freq       5'025   9'062  14'087
##              perc        1.2%    2.1%    3.3%
##              p.row      35.7%   64.3%       .
##              p.col       2.5%    3.9%       .
##                                              
## GA           freq           2       5       7
##              perc        0.0%    0.0%    0.0%
##              p.row      28.6%   71.4%       .
##              p.col       0.0%    0.0%       .
##                                              
## GB           freq      20'031  15'808  35'839
##              perc        4.7%    3.7%    8.3%
##              p.row      55.9%   44.1%       .
##              p.col      10.1%    6.8%       .
##                                              
## GD           freq           3       5       8
##              perc        0.0%    0.0%    0.0%
##              p.row      37.5%   62.5%       .
##              p.col       0.0%    0.0%       .
##                                              
## GE           freq          44      27      71
##              perc        0.0%    0.0%    0.0%
##              p.row      62.0%   38.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## GF           freq           1       5       6
##              perc        0.0%    0.0%    0.0%
##              p.row      16.7%   83.3%       .
##              p.col       0.0%    0.0%       .
##                                              
## GH           freq         133      99     232
##              perc        0.0%    0.0%    0.1%
##              p.row      57.3%   42.7%       .
##              p.col       0.1%    0.0%       .
##                                              
## GL           freq           0       4       4
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## GM           freq           1       2       3
##              perc        0.0%    0.0%    0.0%
##              p.row      33.3%   66.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## GN           freq           6       2       8
##              perc        0.0%    0.0%    0.0%
##              p.row      75.0%   25.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## GP           freq           0       0       0
##              perc        0.0%    0.0%    0.0%
##              p.row    NA      NA            .
##              p.col       0.0%    0.0%       .
##                                              
## GQ           freq           0       2       2
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## GR           freq       1'539   2'543   4'082
##              perc        0.4%    0.6%    0.9%
##              p.row      37.7%   62.3%       .
##              p.col       0.8%    1.1%       .
##                                              
## GT           freq           7       5      12
##              perc        0.0%    0.0%    0.0%
##              p.row      58.3%   41.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## GU           freq           2       1       3
##              perc        0.0%    0.0%    0.0%
##              p.row      66.7%   33.3%       .
##              p.col       0.0%    0.0%       .
##                                              
## GW           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## GY           freq           2       1       3
##              perc        0.0%    0.0%    0.0%
##              p.row      66.7%   33.3%       .
##              p.col       0.0%    0.0%       .
##                                              
## HK           freq       1'295   1'195   2'490
##              perc        0.3%    0.3%    0.6%
##              p.row      52.0%   48.0%       .
##              p.col       0.7%    0.5%       .
##                                              
## HN           freq           3       2       5
##              perc        0.0%    0.0%    0.0%
##              p.row      60.0%   40.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## HR           freq         187     257     444
##              perc        0.0%    0.1%    0.1%
##              p.row      42.1%   57.9%       .
##              p.col       0.1%    0.1%       .
##                                              
## HT           freq           3       2       5
##              perc        0.0%    0.0%    0.0%
##              p.row      60.0%   40.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## HU           freq         472     742   1'214
##              perc        0.1%    0.2%    0.3%
##              p.row      38.9%   61.1%       .
##              p.col       0.2%    0.3%       .
##                                              
## ID           freq         167     136     303
##              perc        0.0%    0.0%    0.1%
##              p.row      55.1%   44.9%       .
##              p.col       0.1%    0.1%       .
##                                              
## IE           freq         999   1'116   2'115
##              perc        0.2%    0.3%    0.5%
##              p.row      47.2%   52.8%       .
##              p.col       0.5%    0.5%       .
##                                              
## IL           freq       1'254   1'815   3'069
##              perc        0.3%    0.4%    0.7%
##              p.row      40.9%   59.1%       .
##              p.col       0.6%    0.8%       .
##                                              
## IN           freq       4'984   4'137   9'121
##              perc        1.2%    1.0%    2.1%
##              p.row      54.6%   45.4%       .
##              p.col       2.5%    1.8%       .
##                                              
## IQ           freq          69      19      88
##              perc        0.0%    0.0%    0.0%
##              p.row      78.4%   21.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## IR           freq         260     610     870
##              perc        0.1%    0.1%    0.2%
##              p.row      29.9%   70.1%       .
##              p.col       0.1%    0.3%       .
##                                              
## IS           freq          68      58     126
##              perc        0.0%    0.0%    0.0%
##              p.row      54.0%   46.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## IT           freq       6'902  23'476  30'378
##              perc        1.6%    5.5%    7.1%
##              p.row      22.7%   77.3%       .
##              p.col       3.5%   10.1%       .
##                                              
## JE           freq           4       3       7
##              perc        0.0%    0.0%    0.0%
##              p.row      57.1%   42.9%       .
##              p.col       0.0%    0.0%       .
##                                              
## JM           freq          17      12      29
##              perc        0.0%    0.0%    0.0%
##              p.row      58.6%   41.4%       .
##              p.col       0.0%    0.0%       .
##                                              
## JO           freq         132      76     208
##              perc        0.0%    0.0%    0.0%
##              p.row      63.5%   36.5%       .
##              p.col       0.1%    0.0%       .
##                                              
## JP           freq       4'006   6'632  10'638
##              perc        0.9%    1.5%    2.5%
##              p.row      37.7%   62.3%       .
##              p.col       2.0%    2.8%       .
##                                              
## KE           freq         154     127     281
##              perc        0.0%    0.0%    0.1%
##              p.row      54.8%   45.2%       .
##              p.col       0.1%    0.1%       .
##                                              
## KH           freq           1      11      12
##              perc        0.0%    0.0%    0.0%
##              p.row       8.3%   91.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## KI           freq           0       3       3
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## KP           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## KR           freq       1'708   3'068   4'776
##              perc        0.4%    0.7%    1.1%
##              p.row      35.8%   64.2%       .
##              p.col       0.9%    1.3%       .
##                                              
## KW           freq          97      94     191
##              perc        0.0%    0.0%    0.0%
##              p.row      50.8%   49.2%       .
##              p.col       0.0%    0.0%       .
##                                              
## KZ           freq          52      36      88
##              perc        0.0%    0.0%    0.0%
##              p.row      59.1%   40.9%       .
##              p.col       0.0%    0.0%       .
##                                              
## LA           freq           0       2       2
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## LB           freq         134     180     314
##              perc        0.0%    0.0%    0.1%
##              p.row      42.7%   57.3%       .
##              p.col       0.1%    0.1%       .
##                                              
## LC           freq           0       0       0
##              perc        0.0%    0.0%    0.0%
##              p.row    NA      NA            .
##              p.col       0.0%    0.0%       .
##                                              
## LI           freq           3       5       8
##              perc        0.0%    0.0%    0.0%
##              p.row      37.5%   62.5%       .
##              p.col       0.0%    0.0%       .
##                                              
## LK           freq          93      65     158
##              perc        0.0%    0.0%    0.0%
##              p.row      58.9%   41.1%       .
##              p.col       0.0%    0.0%       .
##                                              
## LR           freq           1       1       2
##              perc        0.0%    0.0%    0.0%
##              p.row      50.0%   50.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## LS           freq           0       1       1
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## LT           freq          86     139     225
##              perc        0.0%    0.0%    0.1%
##              p.row      38.2%   61.8%       .
##              p.col       0.0%    0.1%       .
##                                              
## LU           freq          79     145     224
##              perc        0.0%    0.0%    0.1%
##              p.row      35.3%   64.7%       .
##              p.col       0.0%    0.1%       .
##                                              
## LV           freq          32      50      82
##              perc        0.0%    0.0%    0.0%
##              p.row      39.0%   61.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## LY           freq          11       5      16
##              perc        0.0%    0.0%    0.0%
##              p.row      68.8%   31.2%       .
##              p.col       0.0%    0.0%       .
##                                              
## MA           freq         122     100     222
##              perc        0.0%    0.0%    0.1%
##              p.row      55.0%   45.0%       .
##              p.col       0.1%    0.0%       .
##                                              
## MC           freq          12      15      27
##              perc        0.0%    0.0%    0.0%
##              p.row      44.4%   55.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## MD           freq           3       7      10
##              perc        0.0%    0.0%    0.0%
##              p.row      30.0%   70.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## ME           freq          10      11      21
##              perc        0.0%    0.0%    0.0%
##              p.row      47.6%   52.4%       .
##              p.col       0.0%    0.0%       .
##                                              
## MG           freq           1       3       4
##              perc        0.0%    0.0%    0.0%
##              p.row      25.0%   75.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## MK           freq           7       5      12
##              perc        0.0%    0.0%    0.0%
##              p.row      58.3%   41.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## ML           freq           4       5       9
##              perc        0.0%    0.0%    0.0%
##              p.row      44.4%   55.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## MM           freq           3       4       7
##              perc        0.0%    0.0%    0.0%
##              p.row      42.9%   57.1%       .
##              p.col       0.0%    0.0%       .
##                                              
## MN           freq          10       2      12
##              perc        0.0%    0.0%    0.0%
##              p.row      83.3%   16.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## MO           freq          79      37     116
##              perc        0.0%    0.0%    0.0%
##              p.row      68.1%   31.9%       .
##              p.col       0.0%    0.0%       .
##                                              
## MQ           freq           0       0       0
##              perc        0.0%    0.0%    0.0%
##              p.row    NA      NA            .
##              p.col       0.0%    0.0%       .
##                                              
## MR           freq           0       1       1
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## MT           freq          50      57     107
##              perc        0.0%    0.0%    0.0%
##              p.row      46.7%   53.3%       .
##              p.col       0.0%    0.0%       .
##                                              
## MU           freq          19      15      34
##              perc        0.0%    0.0%    0.0%
##              p.row      55.9%   44.1%       .
##              p.col       0.0%    0.0%       .
##                                              
## MV           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## MW           freq           8      14      22
##              perc        0.0%    0.0%    0.0%
##              p.row      36.4%   63.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## MX           freq         587   1'620   2'207
##              perc        0.1%    0.4%    0.5%
##              p.row      26.6%   73.4%       .
##              p.col       0.3%    0.7%       .
##                                              
## MY           freq         828     749   1'577
##              perc        0.2%    0.2%    0.4%
##              p.row      52.5%   47.5%       .
##              p.col       0.4%    0.3%       .
##                                              
## MZ           freq           6       4      10
##              perc        0.0%    0.0%    0.0%
##              p.row      60.0%   40.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## NA           freq           9      10      19
##              perc        0.0%    0.0%    0.0%
##              p.row      47.4%   52.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## NC           freq           0       3       3
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## NE           freq           1       3       4
##              perc        0.0%    0.0%    0.0%
##              p.row      25.0%   75.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## NG           freq         191     210     401
##              perc        0.0%    0.0%    0.1%
##              p.row      47.6%   52.4%       .
##              p.col       0.1%    0.1%       .
##                                              
## NI           freq           5       4       9
##              perc        0.0%    0.0%    0.0%
##              p.row      55.6%   44.4%       .
##              p.col       0.0%    0.0%       .
##                                              
## NL           freq       3'652   3'853   7'505
##              perc        0.8%    0.9%    1.7%
##              p.row      48.7%   51.3%       .
##              p.col       1.8%    1.7%       .
##                                              
## NO           freq       1'058   1'329   2'387
##              perc        0.2%    0.3%    0.6%
##              p.row      44.3%   55.7%       .
##              p.col       0.5%    0.6%       .
##                                              
## NP           freq          34      36      70
##              perc        0.0%    0.0%    0.0%
##              p.row      48.6%   51.4%       .
##              p.col       0.0%    0.0%       .
##                                              
## NZ           freq       1'698   1'145   2'843
##              perc        0.4%    0.3%    0.7%
##              p.row      59.7%   40.3%       .
##              p.col       0.9%    0.5%       .
##                                              
## OM           freq          82      57     139
##              perc        0.0%    0.0%    0.0%
##              p.row      59.0%   41.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## PA           freq           9      19      28
##              perc        0.0%    0.0%    0.0%
##              p.row      32.1%   67.9%       .
##              p.col       0.0%    0.0%       .
##                                              
## PE           freq          38      96     134
##              perc        0.0%    0.0%    0.0%
##              p.row      28.4%   71.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## PF           freq           2       4       6
##              perc        0.0%    0.0%    0.0%
##              p.row      33.3%   66.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## PH           freq         116      81     197
##              perc        0.0%    0.0%    0.0%
##              p.row      58.9%   41.1%       .
##              p.col       0.1%    0.0%       .
##                                              
## PK           freq         300     497     797
##              perc        0.1%    0.1%    0.2%
##              p.row      37.6%   62.4%       .
##              p.col       0.2%    0.2%       .
##                                              
## PL           freq         964   1'783   2'747
##              perc        0.2%    0.4%    0.6%
##              p.row      35.1%   64.9%       .
##              p.col       0.5%    0.8%       .
##                                              
## PR           freq          30      49      79
##              perc        0.0%    0.0%    0.0%
##              p.row      38.0%   62.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## PT           freq       1'336   3'081   4'417
##              perc        0.3%    0.7%    1.0%
##              p.row      30.2%   69.8%       .
##              p.col       0.7%    1.3%       .
##                                              
## PY           freq           8      12      20
##              perc        0.0%    0.0%    0.0%
##              p.row      40.0%   60.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## QA           freq         156     238     394
##              perc        0.0%    0.1%    0.1%
##              p.row      39.6%   60.4%       .
##              p.col       0.1%    0.1%       .
##                                              
## RE           freq           3       2       5
##              perc        0.0%    0.0%    0.0%
##              p.row      60.0%   40.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## RO           freq         387     539     926
##              perc        0.1%    0.1%    0.2%
##              p.row      41.8%   58.2%       .
##              p.col       0.2%    0.2%       .
##                                              
## RS           freq         187     349     536
##              perc        0.0%    0.1%    0.1%
##              p.row      34.9%   65.1%       .
##              p.col       0.1%    0.1%       .
##                                              
## RU           freq       2'618   1'006   3'624
##              perc        0.6%    0.2%    0.8%
##              p.row      72.2%   27.8%       .
##              p.col       1.3%    0.4%       .
##                                              
## RW           freq           7       7      14
##              perc        0.0%    0.0%    0.0%
##              p.row      50.0%   50.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## SA           freq         443     703   1'146
##              perc        0.1%    0.2%    0.3%
##              p.row      38.7%   61.3%       .
##              p.col       0.2%    0.3%       .
##                                              
## SD           freq          13      11      24
##              perc        0.0%    0.0%    0.0%
##              p.row      54.2%   45.8%       .
##              p.col       0.0%    0.0%       .
##                                              
## SE           freq       2'125   2'439   4'564
##              perc        0.5%    0.6%    1.1%
##              p.row      46.6%   53.4%       .
##              p.col       1.1%    1.0%       .
##                                              
## SG           freq       1'400   1'464   2'864
##              perc        0.3%    0.3%    0.7%
##              p.row      48.9%   51.1%       .
##              p.col       0.7%    0.6%       .
##                                              
## SH           freq           0       1       1
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## SI           freq         316     382     698
##              perc        0.1%    0.1%    0.2%
##              p.row      45.3%   54.7%       .
##              p.col       0.2%    0.2%       .
##                                              
## SK           freq         115     174     289
##              perc        0.0%    0.0%    0.1%
##              p.row      39.8%   60.2%       .
##              p.col       0.1%    0.1%       .
##                                              
## SL           freq           2       3       5
##              perc        0.0%    0.0%    0.0%
##              p.row      40.0%   60.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## SM           freq           0       3       3
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## SN           freq          25       9      34
##              perc        0.0%    0.0%    0.0%
##              p.row      73.5%   26.5%       .
##              p.col       0.0%    0.0%       .
##                                              
## SO           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## SS           freq           0       1       1
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## SV           freq           1       1       2
##              perc        0.0%    0.0%    0.0%
##              p.row      50.0%   50.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## SY           freq           3      13      16
##              perc        0.0%    0.0%    0.0%
##              p.row      18.8%   81.2%       .
##              p.col       0.0%    0.0%       .
##                                              
## SZ           freq           1       3       4
##              perc        0.0%    0.0%    0.0%
##              p.row      25.0%   75.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## TD           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## TG           freq           0       2       2
##              perc        0.0%    0.0%    0.0%
##              p.row       0.0%  100.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## TH           freq         389     367     756
##              perc        0.1%    0.1%    0.2%
##              p.row      51.5%   48.5%       .
##              p.col       0.2%    0.2%       .
##                                              
## TJ           freq           1       1       2
##              perc        0.0%    0.0%    0.0%
##              p.row      50.0%   50.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## TM           freq           6       1       7
##              perc        0.0%    0.0%    0.0%
##              p.row      85.7%   14.3%       .
##              p.col       0.0%    0.0%       .
##                                              
## TN           freq         134     109     243
##              perc        0.0%    0.0%    0.1%
##              p.row      55.1%   44.9%       .
##              p.col       0.1%    0.0%       .
##                                              
## TR           freq       1'492   1'396   2'888
##              perc        0.3%    0.3%    0.7%
##              p.row      51.7%   48.3%       .
##              p.col       0.8%    0.6%       .
##                                              
## TT           freq          16      14      30
##              perc        0.0%    0.0%    0.0%
##              p.row      53.3%   46.7%       .
##              p.col       0.0%    0.0%       .
##                                              
## TW           freq       1'429   2'146   3'575
##              perc        0.3%    0.5%    0.8%
##              p.row      40.0%   60.0%       .
##              p.col       0.7%    0.9%       .
##                                              
## TZ           freq          22      40      62
##              perc        0.0%    0.0%    0.0%
##              p.row      35.5%   64.5%       .
##              p.col       0.0%    0.0%       .
##                                              
## UA           freq          98      94     192
##              perc        0.0%    0.0%    0.0%
##              p.row      51.0%   49.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## UG           freq          34      62      96
##              perc        0.0%    0.0%    0.0%
##              p.row      35.4%   64.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## UK           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## US           freq      63'535  57'402 120'937
##              perc       14.8%   13.3%   28.1%
##              p.row      52.5%   47.5%       .
##              p.col      32.2%   24.6%       .
##                                              
## UY           freq          83     126     209
##              perc        0.0%    0.0%    0.0%
##              p.row      39.7%   60.3%       .
##              p.col       0.0%    0.1%       .
##                                              
## UZ           freq           9       3      12
##              perc        0.0%    0.0%    0.0%
##              p.row      75.0%   25.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## VE           freq          24      34      58
##              perc        0.0%    0.0%    0.0%
##              p.row      41.4%   58.6%       .
##              p.col       0.0%    0.0%       .
##                                              
## VN           freq         148     128     276
##              perc        0.0%    0.0%    0.1%
##              p.row      53.6%   46.4%       .
##              p.col       0.1%    0.1%       .
##                                              
## WS           freq           1       0       1
##              perc        0.0%    0.0%    0.0%
##              p.row     100.0%    0.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## XK           freq           8       1       9
##              perc        0.0%    0.0%    0.0%
##              p.row      88.9%   11.1%       .
##              p.col       0.0%    0.0%       .
##                                              
## YE           freq           6       6      12
##              perc        0.0%    0.0%    0.0%
##              p.row      50.0%   50.0%       .
##              p.col       0.0%    0.0%       .
##                                              
## ZA           freq       1'152     854   2'006
##              perc        0.3%    0.2%    0.5%
##              p.row      57.4%   42.6%       .
##              p.col       0.6%    0.4%       .
##                                              
## ZM           freq          15      25      40
##              perc        0.0%    0.0%    0.0%
##              p.row      37.5%   62.5%       .
##              p.col       0.0%    0.0%       .
##                                              
## ZW           freq          32      13      45
##              perc        0.0%    0.0%    0.0%
##              p.row      71.1%   28.9%       .
##              p.col       0.0%    0.0%       .
##                                              
## Sum          freq     197'612 232'941 430'553
##              perc       45.9%   54.1%  100.0%
##              p.row          .       .       .
##              p.col          .       .       .
## 

## ------------------------------------------------------------------------------ 
## FAC_ROLE ~ IND_OPEN (.)
## 
## Summary: 
## n: 424'264, rows: 3, columns: 2
## 
## Pearson's Chi-squared test:
##   X-squared = 4915.6, df = 2, p-value < 2.2e-16
## Log likelihood ratio (G-test) test of independence:
##   G = 5387.8, X-squared df = 2, p-value < 2.2e-16
## Mantel-Haenszel Chi-squared:
##   X-squared = 2676.9, df = 1, p-value < 2.2e-16
## 
## Phi-Coefficient        0.108
## Contingency Coeff.     0.107
## Cramer's V             0.108
## 
##                                            
##            IND_OPEN   FALSE    TRUE     Sum
## FAC_ROLE                                   
##                                            
## review     freq     123'769 163'656 287'425
##            perc       29.2%   38.6%   67.7%
##            p.row      43.1%   56.9%       .
##            p.col      64.5%   70.4%       .
##                                            
## editor     freq      63'957  68'368 132'325
##            perc       15.1%   16.1%   31.2%
##            p.row      48.3%   51.7%       .
##            p.col      33.3%   29.4%       .
##                                            
## chief      freq       4'118     396   4'514
##            perc        1.0%    0.1%    1.1%
##            p.row      91.2%    8.8%       .
##            p.col       2.1%    0.2%       .
##                                            
## Sum        freq     191'844 232'420 424'264
##            perc       45.2%   54.8%  100.0%
##            p.row          .       .       .
##            p.col          .       .       .
## 

## ------------------------------------------------------------------------------ 
## IND_MALE ~ IND_OPEN (.)
## 
## Summary: 
## n: 284'342, rows: 2, columns: 2
## 
## Pearson's Chi-squared test (cont. adj):
##   X-squared = 0.50722, df = 1, p-value = 0.4763
## Fisher's exact test p-value = 0.4752
## McNemar's chi-squared = 4734.1, df = 1, p-value < 2.2e-16
## 
##                     estimate lwr.ci upr.ci'
##                                           
## odds ratio             1.006  0.990  1.022
## rel. risk (col1)       1.003  0.994  1.013
## rel. risk (col2)       0.998  0.991  1.004
## 
## 
## Phi-Coefficient        0.001
## Contingency Coeff.     0.001
## Cramer's V             0.001
## 
##                                            
##            IND_OPEN   FALSE    TRUE     Sum
## IND_MALE                                   
##                                            
## FALSE      freq      37'634  53'907  91'541
##            perc       13.2%   19.0%   32.2%
##            p.row      41.1%   58.9%       .
##            p.col      32.3%   32.1%       .
##                                            
## TRUE       freq      78'991 113'810 192'801
##            perc       27.8%   40.0%   67.8%
##            p.row      41.0%   59.0%       .
##            p.col      67.7%   67.9%       .
##                                            
## Sum        freq     116'625 167'717 284'342
##            perc       41.0%   59.0%  100.0%
##            p.row          .       .       .
##            p.col          .       .       .
##                                            
## 
## ----------
## ' 95% conf. level

editors_analysis.tbl  %>% Desc(formula=IND_OPEN+FAC_ROLE+IND_OPEN %in% FAC_ROLE ~ IND_MALE,data=.)
## ------------------------------------------------------------------------------ 
## IND_OPEN ~ IND_MALE (.)
## 
## Summary: 
## n: 284'342, rows: 2, columns: 2
## 
## Pearson's Chi-squared test (cont. adj):
##   X-squared = 0.50722, df = 1, p-value = 0.4763
## Fisher's exact test p-value = 0.4752
## McNemar's chi-squared = 4734.1, df = 1, p-value < 2.2e-16
## 
##                     estimate lwr.ci upr.ci'
##                                           
## odds ratio             1.006  0.990  1.022
## rel. risk (col1)       1.004  0.993  1.015
## rel. risk (col2)       0.998  0.993  1.003
## 
## 
## Phi-Coefficient        0.001
## Contingency Coeff.     0.001
## Cramer's V             0.001
## 
##                                            
##            IND_MALE   FALSE    TRUE     Sum
## IND_OPEN                                   
##                                            
## FALSE      freq      37'634  78'991 116'625
##            perc       13.2%   27.8%   41.0%
##            p.row      32.3%   67.7%       .
##            p.col      41.1%   41.0%       .
##                                            
## TRUE       freq      53'907 113'810 167'717
##            perc       19.0%   40.0%   59.0%
##            p.row      32.1%   67.9%       .
##            p.col      58.9%   59.0%       .
##                                            
## Sum        freq      91'541 192'801 284'342
##            perc       32.2%   67.8%  100.0%
##            p.row          .       .       .
##            p.col          .       .       .
##                                            
## 
## ----------
## ' 95% conf. level

## ------------------------------------------------------------------------------ 
## FAC_ROLE ~ IND_MALE (.)
## 
## Summary: 
## n: 308'623, rows: 3, columns: 2
## 
## Pearson's Chi-squared test:
##   X-squared = 320.94, df = 2, p-value < 2.2e-16
## Log likelihood ratio (G-test) test of independence:
##   G = 340.35, X-squared df = 2, p-value < 2.2e-16
## Mantel-Haenszel Chi-squared:
##   X-squared = 9.7643, df = 1, p-value = 0.001779
## 
## Phi-Coefficient        0.032
## Contingency Coeff.     0.032
## Cramer's V             0.032
## 
##                                            
##            IND_MALE   FALSE    TRUE     Sum
## FAC_ROLE                                   
##                                            
## review     freq      65'119 142'002 207'121
##            perc       21.1%   46.0%   67.1%
##            p.row      31.4%   68.6%       .
##            p.col      66.3%   67.5%       .
##                                            
## editor     freq      32'411  65'417  97'828
##            perc       10.5%   21.2%   31.7%
##            p.row      33.1%   66.9%       .
##            p.col      33.0%   31.1%       .
##                                            
## chief      freq         741   2'933   3'674
##            perc        0.2%    1.0%    1.2%
##            p.row      20.2%   79.8%       .
##            p.col       0.8%    1.4%       .
##                                            
## Sum        freq      98'271 210'352 308'623
##            perc       31.8%   68.2%  100.0%
##            p.row          .       .       .
##            p.col          .       .       .
## 

## ------------------------------------------------------------------------------ 
## IND_OPEN:FAC_ROLE ~ IND_MALE (.)
## 
## Summary: 
## n: 280'402, rows: 6, columns: 2
## 
## Pearson's Chi-squared test:
##   X-squared = 263.08, df = 5, p-value < 2.2e-16
## Log likelihood ratio (G-test) test of independence:
##   G = 278.21, X-squared df = 5, p-value < 2.2e-16
## Mantel-Haenszel Chi-squared:
##   X-squared = 9.1472, df = 1, p-value = 0.002491
## 
## Phi-Coefficient        0.031
## Contingency Coeff.     0.031
## Cramer's V             0.031
## 
##                                                     
##                     IND_MALE   FALSE    TRUE     Sum
## IND_OPEN:FAC_ROLE                                   
##                                                     
## FALSE:review        freq      22'391  48'416  70'807
##                     perc        8.0%   17.3%   25.3%
##                     p.row      31.6%   68.4%       .
##                     p.col      24.8%   25.5%       .
##                                                     
## TRUE:review         freq      37'430  79'909 117'339
##                     perc       13.3%   28.5%   41.8%
##                     p.row      31.9%   68.1%       .
##                     p.col      41.5%   42.0%       .
##                                                     
## FALSE:editor        freq      13'595  26'321  39'916
##                     perc        4.8%    9.4%   14.2%
##                     p.row      34.1%   65.9%       .
##                     p.col      15.1%   13.8%       .
##                                                     
## TRUE:editor         freq      16'324  33'438  49'762
##                     perc        5.8%   11.9%   17.7%
##                     p.row      32.8%   67.2%       .
##                     p.col      18.1%   17.6%       .
##                                                     
## FALSE:chief         freq         457   1'853   2'310
##                     perc        0.2%    0.7%    0.8%
##                     p.row      19.8%   80.2%       .
##                     p.col       0.5%    1.0%       .
##                                                     
## TRUE:chief          freq          59     209     268
##                     perc        0.0%    0.1%    0.1%
##                     p.row      22.0%   78.0%       .
##                     p.col       0.1%    0.1%       .
##                                                     
## Sum                 freq      90'256 190'146 280'402
##                     perc       32.2%   67.8%  100.0%
##                     p.row          .       .       .
##                     p.col          .       .       .
## 

world.sf <- ne_countries(scale = "medium", returnclass = "sf")

# Todo: track country code non-matches "RE" "UK" "GP" "MQ" "GF" "XK" "AN"
ctry_totals<- editors_analysis.tbl %>% group_by(LS_COUNTRY) %>% summarise(n_editors=n()) %>% rename(iso_a2=LS_COUNTRY)

world.sf %<>% left_join(ctry_totals)

ggplot(data = world.sf) +
    geom_sf(aes(fill = n_editors)) +
        scale_fill_viridis_c(option = "E", trans = "log")

journal_board_analysis.tbl %>% select(-NM_JOURNAL,-LIST_ROLEGROUP_COUNTRIES,-LIST_SUBJECTS) %>% Desc(~.,data=.)
## ------------------------------------------------------------------------------ 
## .$FAC_ROLE (ordered)
## 
##   length      n    NAs unique levels  dupes
##   15'314 14'191  1'123      3      3      y
##           92.7%   7.3%                     
## 
##     level   freq   perc  cumfreq  cumperc
## 1  review  5'129  36.1%    5'129    36.1%
## 2  editor  5'454  38.4%   10'583    74.6%
## 3   chief  3'608  25.4%   14'191   100.0%

## ------------------------------------------------------------------------------ 
## .$CAT_PUBLISHER (character)
## 
##   length      n    NAs unique levels  dupes
##   15'314 15'314      0     17     17      y
##          100.0%   0.0%                     
## 
##                          level   freq   perc  cumfreq  cumperc
## 1                     Elsevier  5'814  38.0%    5'814    38.0%
## 2                         SAGE  2'890  18.9%    8'704    56.8%
## 3                 Inderscience  1'310   8.6%   10'014    65.4%
## 4                      Emerald  1'289   8.4%   11'303    73.8%
## 5   Cambridge University Press    879   5.7%   12'182    79.5%
## 6                        Brill    629   4.1%   12'811    83.7%
## 7                         MDPI    536   3.5%   13'347    87.2%
## 8                   IGI Global    406   2.7%   13'753    89.8%
## 9                     Pleiades    344   2.2%   14'097    92.1%
## 10                     Hindawi    302   2.0%   14'399    94.0%
## 11                      Karger    257   1.7%   14'656    95.7%
## 12              John Benjamins    203   1.3%   14'859    97.0%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## .$N_SUBJECTS (numeric)
## 
##   length      n    NAs  unique    0s  mean  meanCI'
##   15'314  9'339  5'975       4     0  1.95    1.93
##           61.0%  39.0%          0.0%          1.97
##                                                   
##      .05    .10    .25  median   .75   .90     .95
##     1.00   1.00   1.00    2.00  3.00  3.00    3.00
##                                                   
##    range     sd  vcoef     mad   IQR  skew    kurt
##     3.00   0.87   0.45    1.48  2.00  0.35   -1.04
##                                                   
## 
##    level   freq   perc  cumfreq  cumperc
## 1      1  3'541  37.9%    3'541    37.9%
## 2      2  3'000  32.1%    6'541    70.0%
## 3      3  2'544  27.2%    9'085    97.3%
## 4      4    254   2.7%    9'339   100.0%
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## .$IND_OPEN (logical)
## 
##   length      n    NAs unique
##   15'314 12'038  3'276      2
##           78.6%  21.4%       
## 
##          freq   perc  lci.95  uci.95'
## FALSE  10'380  86.2%   85.6%   86.8%
## TRUE    1'658  13.8%   13.2%   14.4%
## 
## ' 95%-CI (Wilson)

## ------------------------------------------------------------------------------ 
## .$N_ROLEGROUP_COUNTRIES (integer)
## 
##   length       n    NAs  unique     0s   mean  meanCI'
##   15'314  15'314      0      85      0   6.66    6.52
##           100.0%   0.0%           0.0%           6.79
##                                                      
##      .05     .10    .25  median    .75    .90     .95
##     1.00    1.00   1.00    3.00  10.00  17.00   21.00
##                                                      
##    range      sd  vcoef     mad    IQR   skew    kurt
##   105.00    8.48   1.27    2.97   9.00   3.30   18.65
##                                                      
## lowest : 1 (5'122), 2 (1'924), 3 (1'042), 4 (743), 5 (639)
## highest: 89, 90 (2), 91, 100, 106
## 
## heap(?): remarkable frequency (33.4%) for the mode(s) (= 1)
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## .$PERCENT_ROLEGROUP_MALE (numeric)
## 
##      length          n        NAs     unique         0s        mean     meanCI'
##      15'314     12'210      3'104        986      1'082   0.7005036  0.6951806
##                  79.7%      20.3%                  7.1%              0.7058267
##                                                                               
##         .05        .10        .25     median        .75         .90        .95
##   0.0000000  0.2500000  0.5384615  0.7500000  1.0000000   1.0000000  1.0000000
##                                                                               
##       range         sd      vcoef        mad        IQR        skew       kurt
##   1.0000000  0.3000711  0.4283648  0.3706500  0.4615385  -0.9913193  0.1952624
##                                                                               
## lowest : 0.0 (1'082), 0.0526316, 0.0666667, 0.0833333 (2), 0.0909091
## highest: 0.9722222, 0.9736842, 0.975, 0.9787234, 1.0 (3'673)
## 
## heap(?): remarkable frequency (30.1%) for the mode(s) (= 1)
## 
## ' 95%-CI (classic)

journal_board_analysis.tbl %>%
  Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)
## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
## 
## Summary: 
## n pairs: 15'314, valid: 12'038 (78.6%), missings: 3'276 (21.4%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      6.253   12.936
## median    3.000    7.000
## sd        6.816   16.347
## IQR       9.000   15.000
## n        10'380    1'658
## np      86.227%  13.773%
## NAs           0        0
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 245.66, df = 1, p-value < 2.2e-16
## 
## 
## Warning:
##   Grouping variable contains 3276 NAs (21.4%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##        
##               1   (1,3]   (3,11]   (11,106]
##   FALSE   89.7%   90.2%    86.7%      77.5%
##   TRUE    10.3%    9.8%    13.3%      22.5%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
## 
## Summary: 
## n pairs: 15'314, valid: 9'470 (61.8%), missings: 5'844 (38.2%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      0.687    0.738
## median    0.750    0.772
## sd        0.308    0.251
## IQR       0.500    0.321
## n         7'927    1'543
## np      83.706%  16.294%
## NAs       2'453      115
## 0s          763       84
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 17.183, df = 1, p-value = 3.395e-05
## 
## 
## Warning:
##   Grouping variable contains 3276 NAs (21.4%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##        
##           [0,0.533]   (0.533,0.75]   (0.75,1)       1
##   FALSE       90.0%          80.6%      76.3%   86.1%
##   TRUE        10.0%          19.4%      23.7%   13.9%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_SUBJECTS (.)
## 
## Summary: 
## n pairs: 15'314, valid: 9'339 (61.0%), missings: 5'975 (39.0%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      1.954    1.885
## median    2.000    2.000
## sd        0.871    0.858
## IQR       2.000    2.000
## n         8'528      811
## np      91.316%   8.684%
## NAs       1'852      847
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 4.6992, df = 1, p-value = 0.03018
## 
## 
## Warning:
##   Grouping variable contains 3276 NAs (21.4%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##        
##               1   (1,2]   (2,3]   (3,4]
##   FALSE   90.7%   91.0%   92.5%   91.3%
##   TRUE     9.3%    9.0%    7.5%    8.7%

journal_board_analysis.tbl %>%
  Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)
## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
## 
## Summary: 
## n pairs: 15'314, valid: 12'038 (78.6%), missings: 3'276 (21.4%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      6.253   12.936
## median    3.000    7.000
## sd        6.816   16.347
## IQR       9.000   15.000
## n        10'380    1'658
## np      86.227%  13.773%
## NAs           0        0
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 245.66, df = 1, p-value < 2.2e-16
## 
## 
## Warning:
##   Grouping variable contains 3276 NAs (21.4%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##        
##               1   (1,3]   (3,11]   (11,106]
##   FALSE   89.7%   90.2%    86.7%      77.5%
##   TRUE    10.3%    9.8%    13.3%      22.5%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
## 
## Summary: 
## n pairs: 15'314, valid: 9'470 (61.8%), missings: 5'844 (38.2%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      0.687    0.738
## median    0.750    0.772
## sd        0.308    0.251
## IQR       0.500    0.321
## n         7'927    1'543
## np      83.706%  16.294%
## NAs       2'453      115
## 0s          763       84
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 17.183, df = 1, p-value = 3.395e-05
## 
## 
## Warning:
##   Grouping variable contains 3276 NAs (21.4%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##        
##           [0,0.533]   (0.533,0.75]   (0.75,1)       1
##   FALSE       90.0%          80.6%      76.3%   86.1%
##   TRUE        10.0%          19.4%      23.7%   13.9%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_SUBJECTS (.)
## 
## Summary: 
## n pairs: 15'314, valid: 9'339 (61.0%), missings: 5'975 (39.0%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      1.954    1.885
## median    2.000    2.000
## sd        0.871    0.858
## IQR       2.000    2.000
## n         8'528      811
## np      91.316%   8.684%
## NAs       1'852      847
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 4.6992, df = 1, p-value = 0.03018
## 
## 
## Warning:
##   Grouping variable contains 3276 NAs (21.4%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##        
##               1   (1,2]   (2,3]   (3,4]
##   FALSE   90.7%   91.0%   92.5%   91.3%
##   TRUE     9.3%    9.0%    7.5%    8.7%

journal_board_analysis.tbl %>%
  filter(FAC_ROLE=="chief") %>%
  Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)
## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
## 
## Summary: 
## n pairs: 3'608, valid: 2'636 (73.1%), missings: 972 (26.9%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      1.330    1.389
## median    1.000    1.000
## sd        0.796    0.996
## IQR       0.000    0.000
## n         2'420      216
## np      91.806%   8.194%
## NAs           0        0
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 0.13217, df = 1, p-value = 0.7162
## 
## 
## Warning:
##   Grouping variable contains 972 NAs (26.9%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##        
##               1   (1,10]
##   FALSE   91.9%    91.5%
##   TRUE     8.1%     8.5%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
## 
## Summary: 
## n pairs: 3'608, valid: 1'625 (45.0%), missings: 1'983 (55.0%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      0.803    0.786
## median    1.000    1.000
## sd        0.362    0.381
## IQR       0.250    0.271
## n         1'453      172
## np      89.415%  10.585%
## NAs         967       44
## 0s          212       29
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 0.16665, df = 1, p-value = 0.6831
## 
## 
## Warning:
##   Grouping variable contains 972 NAs (26.9%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##        
##           [0,0.75]   (0.75,1)       1
##   FALSE      89.1%      87.5%   89.6%
##   TRUE       10.9%      12.5%   10.4%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_SUBJECTS (.)
## 
## Summary: 
## n pairs: 3'608, valid: 2'184 (60.5%), missings: 1'424 (39.5%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      1.963    1.856
## median    2.000    2.000
## sd        0.873    0.849
## IQR       2.000    1.000
## n         2'059      125
## np      94.277%   5.723%
## NAs         361       91
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 1.7759, df = 1, p-value = 0.1827
## 
## 
## Warning:
##   Grouping variable contains 972 NAs (26.9%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##        
##               1   (1,2]   (2,3]   (3,4]
##   FALSE   93.7%   93.9%   95.4%   94.7%
##   TRUE     6.3%    6.1%    4.6%    5.3%

journal_board_analysis.tbl %>%
  filter(FAC_ROLE=="editor") %>%
  Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)
## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
## 
## Summary: 
## n pairs: 5'454, valid: 4'360 (79.9%), missings: 1'094 (20.1%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      5.820    9.370
## median    4.000    3.000
## sd        6.014   15.976
## IQR       5.000    7.000
## n         3'736      624
## np      85.688%  14.312%
## NAs           0        0
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 0.85307, df = 1, p-value = 0.3557
## 
## 
## Warning:
##   Grouping variable contains 1094 NAs (20.1%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##        
##              Q1      Q2      Q3      Q4
##   FALSE   84.5%   85.8%   88.8%   85.3%
##   TRUE    15.5%   14.2%   11.2%   14.7%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
## 
## Summary: 
## n pairs: 5'454, valid: 3'552 (65.1%), missings: 1'902 (34.9%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      0.641    0.723
## median    0.667    0.750
## sd        0.291    0.282
## IQR       0.364    0.429
## n         2'979      573
## np      83.868%  16.132%
## NAs         757       51
## 0s          262       37
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 44.709, df = 1, p-value = 2.286e-11
## 
## 
## Warning:
##   Grouping variable contains 1094 NAs (20.1%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##        
##              Q1      Q2      Q3      Q4
##   FALSE   88.4%   84.0%   85.6%   76.4%
##   TRUE    11.6%   16.0%   14.4%   23.6%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_SUBJECTS (.)
## 
## Summary: 
## n pairs: 5'454, valid: 3'334 (61.1%), missings: 2'120 (38.9%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      1.968    1.895
## median    2.000    2.000
## sd        0.874    0.858
## IQR       2.000    2.000
## n         3'028      306
## np      90.822%   9.178%
## NAs         708      318
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 1.9234, df = 1, p-value = 0.1655
## 
## 
## Warning:
##   Grouping variable contains 1094 NAs (20.1%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##        
##               1   (1,2]   (2,3]   (3,4]
##   FALSE   90.2%   90.5%   92.0%   91.4%
##   TRUE     9.8%    9.5%    8.0%    8.6%

journal_board_analysis.tbl %>%
  filter(FAC_ROLE=="review") %>%
  Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)
## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
## 
## Summary: 
## n pairs: 5'129, valid: 4'155 (81.0%), missings: 974 (19.0%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean     11.115   20.005
## median   11.000   15.000
## sd        7.308   16.328
## IQR       9.000   14.000
## n         3'399      756
## np      81.805%  18.195%
## NAs           0        0
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 250.4, df = 1, p-value < 2.2e-16
## 
## 
## Warning:
##   Grouping variable contains 974 NAs (19%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##        
##              Q1      Q2      Q3      Q4
##   FALSE   92.3%   83.4%   84.4%   63.8%
##   TRUE     7.7%   16.6%   15.6%   36.2%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
## 
## Summary: 
## n pairs: 5'129, valid: 3'533 (68.9%), missings: 1'596 (31.1%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      0.705    0.746
## median    0.733    0.761
## sd        0.225    0.145
## IQR       0.274    0.181
## n         2'790      743
## np      78.970%  21.030%
## NAs         609       13
## 0s           76        3
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 10.464, df = 1, p-value = 0.001217
## 
## 
## Warning:
##   Grouping variable contains 974 NAs (19%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##        
##              Q1      Q2      Q3      Q4
##   FALSE   88.5%   73.5%   71.6%   82.7%
##   TRUE    11.5%   26.5%   28.4%   17.3%

## ------------------------------------------------------------------------------ 
## IND_OPEN ~ N_SUBJECTS (.)
## 
## Summary: 
## n pairs: 5'129, valid: 3'116 (60.8%), missings: 2'013 (39.2%), groups: 2
## 
##                         
##           FALSE     TRUE
## mean      1.944    1.891
## median    2.000    2.000
## sd        0.870    0.851
## IQR       2.000    2.000
## n         2'758      358
## np      88.511%  11.489%
## NAs         641      398
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 1.1105, df = 1, p-value = 0.292
## 
## 
## Warning:
##   Grouping variable contains 974 NAs (19%).
## 
## 
## 
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##        
##               1   (1,2]   (2,3]   (3,4]
##   FALSE   88.1%   87.9%   89.7%   89.4%
##   TRUE    11.9%   12.1%   10.3%   10.6%

lm.res <- journal_board_analysis.tbl %>% 
  lm(PERCENT_ROLEGROUP_MALE~IND_OPEN+FAC_ROLE+N_SUBJECTS,data=.)

# TODO: GENDER BY DISCIPLINE


#TODO: Use subject codes

summary(lm.res)
## 
## Call:
## lm(formula = PERCENT_ROLEGROUP_MALE ~ IND_OPEN + FAC_ROLE + N_SUBJECTS, 
##     data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84647 -0.13718  0.05986  0.19609  0.36282 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.7133707  0.0085592  83.346  < 2e-16 ***
## IND_OPENTRUE 0.0405883  0.0109962   3.691 0.000225 ***
## FAC_ROLE.L   0.0706779  0.0065860  10.732  < 2e-16 ***
## FAC_ROLE.Q   0.0945218  0.0057947  16.312  < 2e-16 ***
## N_SUBJECTS   0.0009865  0.0039245   0.251 0.801529    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2783 on 6661 degrees of freedom
##   (8648 observations deleted due to missingness)
## Multiple R-squared:  0.04761,    Adjusted R-squared:  0.04704 
## F-statistic: 83.25 on 4 and 6661 DF,  p-value: < 2.2e-16
plot(lm.res)

6 Results

  • Journal: field, OA status
  • Authors: # Discussion # References {#references .unnumbered} # Appendix - Supplementary Tables # Exploratory Analysis – Not For Submission
explore.ls <-  lapply(ls(pattern="_analysis.tbl"),sym)
library(skimr)
for (i in explore.ls) { cat("****\n"); print(i); cat("****\n"); print(skim(eval(i))) }
## ****
## editors_analysis.tbl
## ****
## ── Data Summary ────────────────────────
##                            Values 
## Name                       eval(i)
## Number of rows             478562 
## Number of columns          7      
## _______________________           
## Column type frequency:            
##   character                4      
##   factor                   1      
##   logical                  2      
## ________________________          
## Group variables            None   
## 
## ── Variable type: character ────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate   min   max empty n_unique whitespace
## 1 NM_JOURNAL            6         1.00      0   146    16     6079          0
## 2 CAT_PUBLISHER         0         1         4    35     0       17          0
## 3 LS_COUNTRY            0         1         0     2 20778      191          0
## 4 LS_SUBJECTS      150584         0.685     2    14     0     1312          0
## 
## ── Variable type: factor ───────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate ordered n_unique
## 1 FAC_ROLE           9390         0.980 TRUE           3
##   top_counts                         
## 1 rev: 317646, edi: 145296, chi: 6230
## 
## ── Variable type: logical ──────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate  mean count                   
## 1 IND_MALE         164093         0.657 0.682 TRU: 214372, FAL: 100097
## 2 IND_OPEN          48009         0.900 0.541 TRU: 232941, FAL: 197612
## ****
## journal_analysis.tbl
## ****
## ── Data Summary ────────────────────────
##                            Values 
## Name                       eval(i)
## Number of rows             6080   
## Number of columns          5      
## _______________________           
## Column type frequency:            
##   character                1      
##   factor                   1      
##   list                     1      
##   logical                  1      
##   numeric                  1      
## ________________________          
## Group variables            None   
## 
## ── Variable type: character ────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate   min   max empty n_unique whitespace
## 1 CAT_PUBLISHER         0             1     4    35     0       17          0
## 
## ── Variable type: factor ───────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate ordered n_unique
## 1 FAC_ROLE             45         0.993 TRUE           3
##   top_counts                  
## 1 rev: 5129, edi: 872, chi: 34
## 
## ── Variable type: list ─────────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate n_unique min_length max_length
## 1 LIST_SUBJECTS      2468         0.594     1312          1          3
## 
## ── Variable type: logical ──────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate  mean count              
## 1 IND_OPEN           1289         0.788 0.165 FAL: 4000, TRU: 791
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate  mean    sd    p0   p25   p50   p75
## 1 N_SUBJECTS         2468         0.594  1.95 0.870     1     1     2     3
##    p100 hist 
## 1     4 ▇▇▁▆▁
## ****
## journal_board_analysis.tbl
## ****
## ── Data Summary ────────────────────────
##                            Values 
## Name                       eval(i)
## Number of rows             15314  
## Number of columns          9      
## _______________________           
## Column type frequency:            
##   character                2      
##   factor                   1      
##   list                     2      
##   logical                  1      
##   numeric                  3      
## ________________________          
## Group variables            None   
## 
## ── Variable type: character ────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate   min   max empty n_unique whitespace
## 1 NM_JOURNAL            1          1.00     0   146     2     6079          0
## 2 CAT_PUBLISHER         0          1        4    35     0       17          0
## 
## ── Variable type: factor ───────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate ordered n_unique
## 1 FAC_ROLE           1123         0.927 TRUE           3
##   top_counts                     
## 1 edi: 5454, rev: 5129, chi: 3608
## 
## ── Variable type: list ─────────────────────────────────────────────────────────
##   skim_variable            n_missing complete_rate n_unique min_length
## 1 LIST_SUBJECTS                 5975         0.610     1312          1
## 2 LIST_ROLEGROUP_COUNTRIES         0         1         9645          1
##   max_length
## 1          3
## 2       9179
## 
## ── Variable type: logical ──────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate  mean count                
## 1 IND_OPEN           3276         0.786 0.138 FAL: 10380, TRU: 1658
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##   skim_variable          n_missing complete_rate  mean    sd    p0   p25   p50
## 1 N_SUBJECTS                  5975         0.610 1.95  0.870     1 1      2   
## 2 N_ROLEGROUP_COUNTRIES          0         1     6.66  8.48      1 1      3   
## 3 PERCENT_ROLEGROUP_MALE      3104         0.797 0.701 0.300     0 0.538  0.75
##     p75  p100 hist 
## 1     3     4 ▇▇▁▆▁
## 2    10   106 ▇▁▁▁▁
## 3     1     1 ▂▁▃▅▇
library(DescTools)
#journal_analysis.tbl %>% select(-NM_JOURNAL) %>% Desc()
for (i in explore.ls)
{ 
  cat("****\n");
  print(i); 
  cat("****\n");
  print(Desc(
      eval(i) %>% select(-starts_with("NM_"),-where(is.list))
             ))
  }
## ****
## editors_analysis.tbl
## ****
## ------------------------------------------------------------------------------ 
## Describe eval(i) %>% select(-starts_with("NM_"), -where(is.list)) (tbl_df, tbl, data.frame):
## 
## data frame:  478562 obs. of  6 variables
##      212164 complete cases (44.3%)
## 
##   Nr  ColName        Class            NAs             Levels           
##   1   CAT_PUBLISHER  character             .                           
##   2   IND_MALE       logical          164093 (34.3%)                   
##   3   IND_OPEN       logical           48009 (10.0%)                   
##   4   LS_COUNTRY     character             .                           
##   5   LS_SUBJECTS    character        150584 (31.5%)                   
##   6   FAC_ROLE       ordered, factor    9390 (2.0%)   (3): 1-review,   
##                                                       2-editor, 3-chief
## 
## 
## ------------------------------------------------------------------------------ 
## 1 - CAT_PUBLISHER (character)
## 
##    length       n     NAs  unique  levels   dupes
##   478'562 478'562       0      17      17       y
##            100.0%    0.0%                        
## 
##                                  level     freq   perc  cumfreq  cumperc
## 1                      Frontiers Media  174'294  36.4%  174'294    36.4%
## 2                             Elsevier  108'961  22.8%  283'255    59.2%
## 3                                 SAGE   56'136  11.7%  339'391    70.9%
## 4                                 MDPI   35'063   7.3%  374'454    78.2%
## 5                              Emerald   18'486   3.9%  392'940    82.1%
## 6                         Inderscience   16'961   3.5%  409'901    85.7%
## 7                              Hindawi   13'424   2.8%  423'325    88.5%
## 8           Cambridge University Press   12'146   2.5%  435'471    91.0%
## 9                                 PLOS   10'643   2.2%  446'114    93.2%
## 10                          IGI Global    9'921   2.1%  456'035    95.3%
## 11                               Brill    5'961   1.2%  461'996    96.5%
## 12  American Psychological Association    3'740   0.8%  465'736    97.3%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## 2 - IND_MALE (logical - dichotomous)
## 
##    length       n     NAs  unique
##   478'562 314'469 164'093       2
##             65.7%   34.3%        
## 
##           freq   perc  lci.95  uci.95'
## FALSE  100'097  31.8%   31.7%   32.0%
## TRUE   214'372  68.2%   68.0%   68.3%
## 
## ' 95%-CI (Wilson)

## ------------------------------------------------------------------------------ 
## 3 - IND_OPEN (logical - dichotomous)
## 
##    length       n     NAs  unique
##   478'562 430'553  48'009       2
##             90.0%   10.0%        
## 
##           freq   perc  lci.95  uci.95'
## FALSE  197'612  45.9%   45.7%   46.0%
## TRUE   232'941  54.1%   54.0%   54.3%
## 
## ' 95%-CI (Wilson)

## ------------------------------------------------------------------------------ 
## 4 - LS_COUNTRY (character)
## 
##    length       n     NAs  unique  levels   dupes
##   478'562 478'562       0     191     191       y
##            100.0%    0.0%                        
## 
##     level     freq   perc  cumfreq  cumperc
## 1      US  133'814  28.0%  133'814    28.0%
## 2      GB   39'538   8.3%  173'352    36.2%
## 3      IT   31'812   6.6%  205'164    42.9%
## 4      CN   27'418   5.7%  232'582    48.6%
## 5           20'778   4.3%  253'360    52.9%
## 6      DE   19'935   4.2%  273'295    57.1%
## 7      AU   17'841   3.7%  291'136    60.8%
## 8      FR   16'468   3.4%  307'604    64.3%
## 9      ES   16'154   3.4%  323'758    67.7%
## 10     CA   15'782   3.3%  339'540    71.0%
## 11     JP   11'786   2.5%  351'326    73.4%
## 12     IN    9'928   2.1%  361'254    75.5%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## 5 - LS_SUBJECTS (character)
## 
##    length       n     NAs  unique  levels   dupes
##   478'562 327'978 150'584   1'312   1'312       y
##             68.5%   31.5%                        
## 
##              level    freq  perc  cumfreq  cumperc
## 1               MD  16'328  5.0%   16'328     5.0%
## 2   1109,1701,1702  13'227  4.0%   29'555     9.0%
## 3        1701,1702  12'719  3.9%   42'274    12.9%
## 4             1115   8'318  2.5%   50'592    15.4%
## 5   0606,1116,1701   8'218  2.5%   58'810    17.9%
## 6   0604,1103,1801   7'698  2.3%   66'508    20.3%
## 7             1112   7'575  2.3%   74'083    22.6%
## 8   0502,0503,0605   6'990  2.1%   81'073    24.7%
## 9             1103   6'910  2.1%   87'983    26.8%
## 10       1107,1108   6'861  2.1%   94'844    28.9%
## 11            0607   6'418  2.0%  101'262    30.9%
## 12            1503   5'982  1.8%  107'244    32.7%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## 6 - FAC_ROLE (ordered, factor)
## 
##    length       n     NAs  unique  levels   dupes
##   478'562 469'172   9'390       3       3       y
##             98.0%    2.0%                        
## 
##     level     freq   perc  cumfreq  cumperc
## 1  review  317'646  67.7%  317'646    67.7%
## 2  editor  145'296  31.0%  462'942    98.7%
## 3   chief    6'230   1.3%  469'172   100.0%

## ****
## journal_analysis.tbl
## ****
## ------------------------------------------------------------------------------ 
## Describe eval(i) %>% select(-starts_with("NM_"), -where(is.list)) (tbl_df, tbl, data.frame):
## 
## data frame:  6080 obs. of  4 variables
##      3598 complete cases (59.2%)
## 
##   Nr  ColName        Class            NAs           Levels           
##   1   FAC_ROLE       ordered, factor    45 (0.7%)   (3): 1-review,   
##                                                     2-editor, 3-chief
##   2   CAT_PUBLISHER  character           .                           
##   3   N_SUBJECTS     numeric          2468 (40.6%)                   
##   4   IND_OPEN       logical          1289 (21.2%)                   
## 
## 
## ------------------------------------------------------------------------------ 
## 1 - FAC_ROLE (ordered, factor)
## 
##   length      n    NAs unique levels  dupes
##    6'080  6'035     45      3      3      y
##           99.3%   0.7%                     
## 
##     level   freq   perc  cumfreq  cumperc
## 1  review  5'129  85.0%    5'129    85.0%
## 2  editor    872  14.4%    6'001    99.4%
## 3   chief     34   0.6%    6'035   100.0%

## ------------------------------------------------------------------------------ 
## 2 - CAT_PUBLISHER (character)
## 
##   length      n    NAs unique levels  dupes
##    6'080  6'080      0     17     17      y
##          100.0%   0.0%                     
## 
##                          level   freq   perc  cumfreq  cumperc
## 1                     Elsevier  2'134  35.1%    2'134    35.1%
## 2                         SAGE  1'191  19.6%    3'325    54.7%
## 3                 Inderscience    470   7.7%    3'795    62.4%
## 4   Cambridge University Press    398   6.5%    4'193    69.0%
## 5                      Emerald    370   6.1%    4'563    75.0%
## 6                        Brill    279   4.6%    4'842    79.6%
## 7                         MDPI    274   4.5%    5'116    84.1%
## 8                      Hindawi    220   3.6%    5'336    87.8%
## 9                   IGI Global    220   3.6%    5'556    91.4%
## 10                    Pleiades    115   1.9%    5'671    93.3%
## 11                      Karger     99   1.6%    5'770    94.9%
## 12             Frontiers Media     92   1.5%    5'862    96.4%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## 3 - N_SUBJECTS (numeric)
## 
##   length      n    NAs  unique    0s  mean  meanCI'
##    6'080  3'612  2'468       4     0  1.95    1.92
##           59.4%  40.6%          0.0%          1.98
##                                                   
##      .05    .10    .25  median   .75   .90     .95
##     1.00   1.00   1.00    2.00  3.00  3.00    3.00
##                                                   
##    range     sd  vcoef     mad   IQR  skew    kurt
##     3.00   0.87   0.45    1.48  2.00  0.35   -1.04
##                                                   
## 
##    level   freq   perc  cumfreq  cumperc
## 1      1  1'363  37.7%    1'363    37.7%
## 2      2  1'166  32.3%    2'529    70.0%
## 3      3    984  27.2%    3'513    97.3%
## 4      4     99   2.7%    3'612   100.0%
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 4 - IND_OPEN (logical - dichotomous)
## 
##   length      n    NAs unique
##    6'080  4'791  1'289      2
##           78.8%  21.2%       
## 
##         freq   perc  lci.95  uci.95'
## FALSE  4'000  83.5%   82.4%   84.5%
## TRUE     791  16.5%   15.5%   17.6%
## 
## ' 95%-CI (Wilson)

## ****
## journal_board_analysis.tbl
## ****
## ------------------------------------------------------------------------------ 
## Describe eval(i) %>% select(-starts_with("NM_"), -where(is.list)) (tbl_df, tbl, data.frame):
## 
## data frame:  15314 obs. of  6 variables
##      6666 complete cases (43.5%)
## 
##   Nr  ColName                 Class            NAs           Levels   
##   1   FAC_ROLE                ordered, factor  1123 (7.3%)   (3):     
##                                                              1-review,
##                                                              2-editor,
##                                                              3-chief  
##   2   CAT_PUBLISHER           character           .                   
##   3   N_SUBJECTS              numeric          5975 (39.0%)           
##   4   IND_OPEN                logical          3276 (21.4%)           
##   5   N_ROLEGROUP_COUNTRIES   integer             .                   
##   6   PERCENT_ROLEGROUP_MALE  numeric          3104 (20.3%)           
## 
## 
## ------------------------------------------------------------------------------ 
## 1 - FAC_ROLE (ordered, factor)
## 
##   length      n    NAs unique levels  dupes
##   15'314 14'191  1'123      3      3      y
##           92.7%   7.3%                     
## 
##     level   freq   perc  cumfreq  cumperc
## 1  review  5'129  36.1%    5'129    36.1%
## 2  editor  5'454  38.4%   10'583    74.6%
## 3   chief  3'608  25.4%   14'191   100.0%

## ------------------------------------------------------------------------------ 
## 2 - CAT_PUBLISHER (character)
## 
##   length      n    NAs unique levels  dupes
##   15'314 15'314      0     17     17      y
##          100.0%   0.0%                     
## 
##                          level   freq   perc  cumfreq  cumperc
## 1                     Elsevier  5'814  38.0%    5'814    38.0%
## 2                         SAGE  2'890  18.9%    8'704    56.8%
## 3                 Inderscience  1'310   8.6%   10'014    65.4%
## 4                      Emerald  1'289   8.4%   11'303    73.8%
## 5   Cambridge University Press    879   5.7%   12'182    79.5%
## 6                        Brill    629   4.1%   12'811    83.7%
## 7                         MDPI    536   3.5%   13'347    87.2%
## 8                   IGI Global    406   2.7%   13'753    89.8%
## 9                     Pleiades    344   2.2%   14'097    92.1%
## 10                     Hindawi    302   2.0%   14'399    94.0%
## 11                      Karger    257   1.7%   14'656    95.7%
## 12              John Benjamins    203   1.3%   14'859    97.0%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## 3 - N_SUBJECTS (numeric)
## 
##   length      n    NAs  unique    0s  mean  meanCI'
##   15'314  9'339  5'975       4     0  1.95    1.93
##           61.0%  39.0%          0.0%          1.97
##                                                   
##      .05    .10    .25  median   .75   .90     .95
##     1.00   1.00   1.00    2.00  3.00  3.00    3.00
##                                                   
##    range     sd  vcoef     mad   IQR  skew    kurt
##     3.00   0.87   0.45    1.48  2.00  0.35   -1.04
##                                                   
## 
##    level   freq   perc  cumfreq  cumperc
## 1      1  3'541  37.9%    3'541    37.9%
## 2      2  3'000  32.1%    6'541    70.0%
## 3      3  2'544  27.2%    9'085    97.3%
## 4      4    254   2.7%    9'339   100.0%
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 4 - IND_OPEN (logical - dichotomous)
## 
##   length      n    NAs unique
##   15'314 12'038  3'276      2
##           78.6%  21.4%       
## 
##          freq   perc  lci.95  uci.95'
## FALSE  10'380  86.2%   85.6%   86.8%
## TRUE    1'658  13.8%   13.2%   14.4%
## 
## ' 95%-CI (Wilson)

## ------------------------------------------------------------------------------ 
## 5 - N_ROLEGROUP_COUNTRIES (integer)
## 
##   length       n    NAs  unique     0s   mean  meanCI'
##   15'314  15'314      0      85      0   6.66    6.52
##           100.0%   0.0%           0.0%           6.79
##                                                      
##      .05     .10    .25  median    .75    .90     .95
##     1.00    1.00   1.00    3.00  10.00  17.00   21.00
##                                                      
##    range      sd  vcoef     mad    IQR   skew    kurt
##   105.00    8.48   1.27    2.97   9.00   3.30   18.65
##                                                      
## lowest : 1 (5'122), 2 (1'924), 3 (1'042), 4 (743), 5 (639)
## highest: 89, 90 (2), 91, 100, 106
## 
## heap(?): remarkable frequency (33.4%) for the mode(s) (= 1)
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 6 - PERCENT_ROLEGROUP_MALE (numeric)
## 
##      length          n        NAs     unique         0s        mean     meanCI'
##      15'314     12'210      3'104        986      1'082   0.7005036  0.6951806
##                  79.7%      20.3%                  7.1%              0.7058267
##                                                                               
##         .05        .10        .25     median        .75         .90        .95
##   0.0000000  0.2500000  0.5384615  0.7500000  1.0000000   1.0000000  1.0000000
##                                                                               
##       range         sd      vcoef        mad        IQR        skew       kurt
##   1.0000000  0.3000711  0.4283648  0.3706500  0.4615385  -0.9913193  0.1952624
##                                                                               
## lowest : 0.0 (1'082), 0.0526316, 0.0666667, 0.0833333 (2), 0.0909091
## highest: 0.9722222, 0.9736842, 0.975, 0.9787234, 1.0 (3'673)
## 
## heap(?): remarkable frequency (30.1%) for the mode(s) (= 1)
## 
## ' 95%-CI (classic)

library(corrr)

mixed_assoc = function(df, cor_method_numeric="pearson", cor_method_ordinal="kendall",
                       adjust_cramersv_bias=TRUE){
# Calculate a pairwise association between all variables in a data-frame. In particular nominal vs nominal with Chi-square, numeric vs numeric with Pearson correlation, and nominal vs numeric with ANOVA.
# Adopted from https://stackoverflow.com/a/52557631/590437
#  -- extended by Micah Altman to detect ordered factors, and to use DescTools rather than rcompanion
  
    df_comb = expand.grid(names(df), names(df),  stringsAsFactors = F) %>% set_names("X1", "X2")

    is_nominal = function(x) class(x) %in% c("factor", "character")
    # https://community.rstudio.com/t/why-is-purr-is-numeric-deprecated/3559
    # https://github.com/r-lib/rlang/issues/781
    is_numeric <- function(x) { is.integer(x) || is_double(x)}
    is_ordinal <- function(x) { is.ordered(x) || is_logical(x)}

    f = function(xName,yName) {
        x =  pull(df, xName)
        y =  pull(df, yName)

        result = if(is_nominal(x) && is_nominal(y)){
            # use bias corrected cramersV as described in https://rdrr.io/cran/rcompanion/man/cramerV.html
            cv = CramerV(as.character(x), as.character(y), correct= adjust_cramersv_bias)
            data.frame(xName, yName, assoc=cv, type="cramersV")

        }else if(is_numeric(x) && is_numeric(y)){
            correlation = cor(x, y, method=cor_method_numeric, use="na.or.complete")
            data.frame(xName, yName, assoc=correlation, type=cor_method_numeric)

        }else if(is_numeric(x) && is_nominal(y)){
            # from https://stats.stackexchange.com/questions/119835/correlation-between-a-nominal-iv-and-a-continuous-dv-variable/124618#124618
            r_squared = summary(lm(x ~ y))$r.squared
            data.frame(xName, yName, assoc=sqrt(r_squared), type="rsq")
        }else if(is_nominal(x) && is_numeric(y)){
            r_squared = summary(lm(y ~x))$r.squared
            data.frame(xName, yName, assoc=sqrt(r_squared), type="rsq")

       } else if(is_ordinal(x) && is_ordinal(y)){
            correlation = cor(as.integer(x), as.integer(y), method=cor_method_ordinal, use="na.or.complete")
            data.frame(xName, yName, assoc=correlation, type=cor_method_ordinal)
       }else if(is_ordinal(x) && is_numeric(y)){
            r_squared = summary(lm(y ~ x))$r.squared
            data.frame(xName, yName, assoc=sqrt(r_squared), type="rsq")
       }else if(is_numeric(x) && is_ordinal(y)){
            r_squared = summary(lm(x ~ y))$r.squared
            data.frame(xName, yName, assoc=sqrt(r_squared), type="rsq")
       }else if(is_nominal(x) && is_ordinal(y)){

            cv = CramerV(as.character(x), as.character(y), correct= adjust_cramersv_bias)
            data.frame(xName, yName, assoc=cv, type="cramersV")
       }else if(is_ordinal(x) && is_nominal(y)){

            cv = CramerV(as.character(x), as.character(y), correct= adjust_cramersv_bias)
            data.frame(xName, yName, assoc=cv, type="cramersV")
    
        } else {
            warn(paste("unmatched column type combination: ", class(x), class(y)))
            return(NULL)
        }

        # finally add complete obs number and ratio to table
        result %>% mutate(complete_obs_pairs=sum(!is.na(x) & !is.na(y)), complete_obs_ratio=complete_obs_pairs/length(x)) %>% rename(x=xName, y=yName)
    }

    # apply function to each variable combination
    map2_df(df_comb$X1, df_comb$X2, f)
}
for (i in explore.ls) { 
  cat("****\n"); 
  print(i); 
  cat("****\n");


assoc.tbl <- eval(i) %>%
   select(-starts_with("NM_"),-where(is.list))%>%
   slice_sample(n=1000) %>%
    mixed_assoc(df=.) 

print(assoc.tbl)

try ({assoc.tbl %>%
    select(x, y, assoc) %>%
    na.omit %>%
    spread(y, assoc) %>%
    column_to_rownames("x") %>%
    as.matrix %>%
    as_cordf %>%
    network_plot() %>% print})
}
## ****
## editors_analysis.tbl
## ****
##                x             y        assoc     type complete_obs_pairs
## 1  CAT_PUBLISHER CAT_PUBLISHER  1.000000000 cramersV               1000
## 2       IND_MALE CAT_PUBLISHER          NaN cramersV                660
## 3       IND_OPEN CAT_PUBLISHER  0.878409719 cramersV                906
## 4     LS_COUNTRY CAT_PUBLISHER  0.223342363 cramersV               1000
## 5    LS_SUBJECTS CAT_PUBLISHER  0.574557454 cramersV                701
## 6       FAC_ROLE CAT_PUBLISHER  0.250634162 cramersV                981
## 7  CAT_PUBLISHER      IND_MALE          NaN cramersV                660
## 8       IND_MALE      IND_MALE  1.000000000  kendall                660
## 9       IND_OPEN      IND_MALE -0.031760462  kendall                605
## 10    LS_COUNTRY      IND_MALE          NaN cramersV                660
## 11   LS_SUBJECTS      IND_MALE          NaN cramersV                456
## 12      FAC_ROLE      IND_MALE  0.006753203  kendall                647
## 13 CAT_PUBLISHER      IND_OPEN  0.878409719 cramersV                906
## 14      IND_MALE      IND_OPEN -0.031760462  kendall                605
## 15      IND_OPEN      IND_OPEN  1.000000000  kendall                906
## 16    LS_COUNTRY      IND_OPEN          NaN cramersV                906
## 17   LS_SUBJECTS      IND_OPEN  0.610196778 cramersV                701
## 18      FAC_ROLE      IND_OPEN -0.113641313  kendall                892
## 19 CAT_PUBLISHER    LS_COUNTRY  0.223342363 cramersV               1000
## 20      IND_MALE    LS_COUNTRY          NaN cramersV                660
## 21      IND_OPEN    LS_COUNTRY          NaN cramersV                906
## 22    LS_COUNTRY    LS_COUNTRY  1.000000000 cramersV               1000
## 23   LS_SUBJECTS    LS_COUNTRY          NaN cramersV                701
## 24      FAC_ROLE    LS_COUNTRY  0.000000000 cramersV                981
## 25 CAT_PUBLISHER   LS_SUBJECTS  0.574557454 cramersV                701
## 26      IND_MALE   LS_SUBJECTS          NaN cramersV                456
## 27      IND_OPEN   LS_SUBJECTS  0.610196778 cramersV                701
## 28    LS_COUNTRY   LS_SUBJECTS          NaN cramersV                701
## 29   LS_SUBJECTS   LS_SUBJECTS  1.000000000 cramersV                701
## 30      FAC_ROLE   LS_SUBJECTS          NaN cramersV                688
## 31 CAT_PUBLISHER      FAC_ROLE  0.250634162 cramersV                981
## 32      IND_MALE      FAC_ROLE  0.006753203  kendall                647
## 33      IND_OPEN      FAC_ROLE -0.113641313  kendall                892
## 34    LS_COUNTRY      FAC_ROLE  0.000000000 cramersV                981
## 35   LS_SUBJECTS      FAC_ROLE          NaN cramersV                688
## 36      FAC_ROLE      FAC_ROLE  1.000000000  kendall                981
##    complete_obs_ratio
## 1               1.000
## 2               0.660
## 3               0.906
## 4               1.000
## 5               0.701
## 6               0.981
## 7               0.660
## 8               0.660
## 9               0.605
## 10              0.660
## 11              0.456
## 12              0.647
## 13              0.906
## 14              0.605
## 15              0.906
## 16              0.906
## 17              0.701
## 18              0.892
## 19              1.000
## 20              0.660
## 21              0.906
## 22              1.000
## 23              0.701
## 24              0.981
## 25              0.701
## 26              0.456
## 27              0.701
## 28              0.701
## 29              0.701
## 30              0.688
## 31              0.981
## 32              0.647
## 33              0.892
## 34              0.981
## 35              0.688
## 36              0.981
## Error in stats::cmdscale(distance, k = 2) : NA values not allowed in 'd'
## ****
## journal_analysis.tbl
## ****
##                x             y       assoc     type complete_obs_pairs
## 1       FAC_ROLE      FAC_ROLE  1.00000000  kendall                989
## 2  CAT_PUBLISHER      FAC_ROLE  0.13983563 cramersV                989
## 3     N_SUBJECTS      FAC_ROLE  0.03841319      rsq                572
## 4       IND_OPEN      FAC_ROLE -0.10199771  kendall                777
## 5       FAC_ROLE CAT_PUBLISHER  0.13983563 cramersV                989
## 6  CAT_PUBLISHER CAT_PUBLISHER  1.00000000 cramersV               1000
## 7     N_SUBJECTS CAT_PUBLISHER  0.22701929      rsq                577
## 8       IND_OPEN CAT_PUBLISHER  0.69462192 cramersV                782
## 9       FAC_ROLE    N_SUBJECTS  0.03841319      rsq                572
## 10 CAT_PUBLISHER    N_SUBJECTS  0.22701929      rsq                577
## 11    N_SUBJECTS    N_SUBJECTS  1.00000000  pearson                577
## 12      IND_OPEN    N_SUBJECTS  0.08991528      rsq                577
## 13      FAC_ROLE      IND_OPEN -0.10199771  kendall                777
## 14 CAT_PUBLISHER      IND_OPEN  0.69462192 cramersV                782
## 15    N_SUBJECTS      IND_OPEN  0.08991528      rsq                577
## 16      IND_OPEN      IND_OPEN  1.00000000  kendall                782
##    complete_obs_ratio
## 1               0.989
## 2               0.989
## 3               0.572
## 4               0.777
## 5               0.989
## 6               1.000
## 7               0.577
## 8               0.782
## 9               0.572
## 10              0.577
## 11              0.577
## 12              0.577
## 13              0.777
## 14              0.782
## 15              0.577
## 16              0.782

## ****
## journal_board_analysis.tbl
## ****
##                         x                      y       assoc     type
## 1                FAC_ROLE               FAC_ROLE  1.00000000  kendall
## 2           CAT_PUBLISHER               FAC_ROLE  0.16262126 cramersV
## 3              N_SUBJECTS               FAC_ROLE  0.04831314      rsq
## 4                IND_OPEN               FAC_ROLE -0.10916146  kendall
## 5   N_ROLEGROUP_COUNTRIES               FAC_ROLE  0.47226446      rsq
## 6  PERCENT_ROLEGROUP_MALE               FAC_ROLE  0.22983142      rsq
## 7                FAC_ROLE          CAT_PUBLISHER  0.16262126 cramersV
## 8           CAT_PUBLISHER          CAT_PUBLISHER  1.00000000 cramersV
## 9              N_SUBJECTS          CAT_PUBLISHER  0.25507993      rsq
## 10               IND_OPEN          CAT_PUBLISHER  0.63317797 cramersV
## 11  N_ROLEGROUP_COUNTRIES          CAT_PUBLISHER  0.73919468      rsq
## 12 PERCENT_ROLEGROUP_MALE          CAT_PUBLISHER  0.25465060      rsq
## 13               FAC_ROLE             N_SUBJECTS  0.04831314      rsq
## 14          CAT_PUBLISHER             N_SUBJECTS  0.25507993      rsq
## 15             N_SUBJECTS             N_SUBJECTS  1.00000000  pearson
## 16               IND_OPEN             N_SUBJECTS  0.01520115      rsq
## 17  N_ROLEGROUP_COUNTRIES             N_SUBJECTS  0.04068278  pearson
## 18 PERCENT_ROLEGROUP_MALE             N_SUBJECTS -0.02242082  pearson
## 19               FAC_ROLE               IND_OPEN -0.10916146  kendall
## 20          CAT_PUBLISHER               IND_OPEN  0.63317797 cramersV
## 21             N_SUBJECTS               IND_OPEN  0.01520115      rsq
## 22               IND_OPEN               IND_OPEN  1.00000000  kendall
## 23  N_ROLEGROUP_COUNTRIES               IND_OPEN  0.33525753      rsq
## 24 PERCENT_ROLEGROUP_MALE               IND_OPEN  0.05062224      rsq
## 25               FAC_ROLE  N_ROLEGROUP_COUNTRIES  0.47226446      rsq
## 26          CAT_PUBLISHER  N_ROLEGROUP_COUNTRIES  0.73919468      rsq
## 27             N_SUBJECTS  N_ROLEGROUP_COUNTRIES  0.04068278  pearson
## 28               IND_OPEN  N_ROLEGROUP_COUNTRIES  0.33525753      rsq
## 29  N_ROLEGROUP_COUNTRIES  N_ROLEGROUP_COUNTRIES  1.00000000  pearson
## 30 PERCENT_ROLEGROUP_MALE  N_ROLEGROUP_COUNTRIES  0.02723793  pearson
## 31               FAC_ROLE PERCENT_ROLEGROUP_MALE  0.22983142      rsq
## 32          CAT_PUBLISHER PERCENT_ROLEGROUP_MALE  0.25465060      rsq
## 33             N_SUBJECTS PERCENT_ROLEGROUP_MALE -0.02242082  pearson
## 34               IND_OPEN PERCENT_ROLEGROUP_MALE  0.05062224      rsq
## 35  N_ROLEGROUP_COUNTRIES PERCENT_ROLEGROUP_MALE  0.02723793  pearson
## 36 PERCENT_ROLEGROUP_MALE PERCENT_ROLEGROUP_MALE  1.00000000  pearson
##    complete_obs_pairs complete_obs_ratio
## 1                 930              0.930
## 2                 930              0.930
## 3                 572              0.572
## 4                 733              0.733
## 5                 930              0.930
## 6                 737              0.737
## 7                 930              0.930
## 8                1000              1.000
## 9                 619              0.619
## 10                792              0.792
## 11               1000              1.000
## 12                793              0.793
## 13                572              0.572
## 14                619              0.619
## 15                619              0.619
## 16                619              0.619
## 17                619              0.619
## 18                469              0.469
## 19                733              0.733
## 20                792              0.792
## 21                619              0.619
## 22                792              0.792
## 23                792              0.792
## 24                615              0.615
## 25                930              0.930
## 26               1000              1.000
## 27                619              0.619
## 28                792              0.792
## 29               1000              1.000
## 30                793              0.793
## 31                737              0.737
## 32                793              0.793
## 33                469              0.469
## 34                615              0.615
## 35                793              0.793
## 36                793              0.793


  1. This method is intended for aggregate analysis and coarse (binary) classification and not for individual-level analysis – e.g. the assignment of an pronoun to a specific author. The classification reported in the table is based on the IPUMS corpus. Bootstrap resampling is used to compute confidence intervals – this reflects sampling error, but not measurement error asising from heuristic name extraction, and uncertainty in name to gender assignment. As a sensitivity check for measurement error we replicated our analyses using two other methods: use of the Social Security Administration database and ‘Kantrowitz’ method (which is popular in the literature, but based on a much smaller corpus). Notwithstanding – the range of estimates does not alter the overall substantive conclusions reported above.

    editors_full.tbl <- editors_parse_c.tbl
    if (!doc_debug) {
        rm("editors_parse.tbl")
    }
    
    ## role coding
    role.tbl <- editors_full.tbl %>% select(`role`) %>%  group_by(role) %>% count()  %>% mutate (`rolec`=str_to_title(role))
    
    role.tbl %<>% rowwise() %>%
    mutate(CAT_ROLE_FORMER = str_detect(rolec,'(Former)|(Past)|(Emerit)'))
    
    role.tbl %<>% rowwise() %>%
      mutate(CAT_ROLE = case_when(
        is.na(rolec) ~ "",
      str_detect(rolec,"(In Chief)|(In-Chief)") ~ "chief",
      str_detect(rolec,"Founding Editor") ~ "chief",
      str_detect(rolec,"Associate Editor") ~ "editor",
      str_detect(rolec,"Assistant Editor") ~ "editor",
      str_detect(rolec,"Senior Editor") ~ "editor",
      str_detect(rolec,"Book Review") ~ "editor",
      str_detect(rolec,"Academic Editor") ~ "review",
      str_detect(rolec,"Review Editor") ~ "review",
      str_detect(rolec,"Editorial Board") ~ "review",
      str_detect(rolec,"Advisory Board") ~ "review",
      str_detect(rolec,"Advisory Committee") ~ "review",
        str_detect(rolec,"Scientific Committee") ~ "review",
      str_detect(rolec,"Scientific Advisor") ~ "review",
      str_detect(rolec,"Editor") ~ "editor",
        str_detect(rolec,"Advisory") ~ "review",
      str_detect(rolec,"Review") ~ "review",
      str_detect(rolec,"Board") ~ "review",
      str_detect(rolec,"Academic") ~ "review",
      str_detect(rolec,"Members") ~ "review",
      TRUE ~ ""
    ))
    
    editors_full.tbl %<>% left_join(role.tbl %>% select(role,CAT_ROLE,CAT_ROLE_FORMER),by=c("role"))
    rm(role.tbl)
    ↩︎